######################################################################################################
## NOTE:
# line 36--280 computes Tables and Figures in Manuscript (first all tables, followed by all figures)
# line 282--2359 computes Tables and Figures in Appendix (first all tables, followed by all figures)
######################################################################################################


rm(list = ls())

# set working directory to YOUR PATH TO REPLICATION MATERIALS
setwd('~/PATH TO REPLICATION MATERIALS/ZG_replication_materials/')

#### *NOTE*: 'gridExtra', 'quantreg' need to be installed before installing RATest
# Note: install RATest version 0.1.9 as indicated below
install.packages('gridExtra')
install.packages('quantreg')
packageurl <- "https://cran.r-project.org/src/contrib/Archive/RATest/RATest_0.1.9.tar.gz"
install.packages(packageurl, repos=NULL, type="source")

#### *NOTE*: install rdrobust version 2.0.3 as indicated below
packageurl <- "https://cran.r-project.org/src/contrib/Archive/rdrobust/rdrobust_2.0.3.tar.gz"
install.packages(packageurl, repos=NULL, type="source")

## load required packages
library(data.table)
library(xtable)
library(ggplot2)
library(sf)
library(rdrobust)
library(estimatr)
library(RATest)
library(rddensity)



############################################
## Compute Tables and Figures in Manuscript
###########################################


# load final data for analysis
data <- fread('./data/clean/data_main_analysis.csv')

### Compute Table 1: Summary Statistics

# compute variable indicating whether minority won
data[,winner:=ifelse(minority_victory_margin>0,1,0)]
# change ethnicity_minority from percentage to proportion for consistency with other variables in Table 1
data[,ethnicity_minority_p:=ethnicity_minority/100]
# define vector of demographic/candidate characteristics
vars <- c('ethnicity_minority_p', 'gradea_c1', 'deprivation1', 'density', 'candidates_Conservative', 'candidates_Labour', 'incumbent')

# compute summary stats
mean <- sapply(data[, c('turnout','share_t1', 'victory_t1', 'share_non_coethnic_opposition_t1', 'eff_num_parties', 'minority_victory_margin', 'winner', vars), with=F], mean, na.rm=TRUE)
min <- sapply(data[, c('turnout', 'share_t1', 'victory_t1', 'share_non_coethnic_opposition_t1', 'eff_num_parties', 'minority_victory_margin', 'winner', vars), with=F], min, na.rm=TRUE)
max <- sapply(data[, c('turnout', 'share_t1', 'victory_t1', 'share_non_coethnic_opposition_t1', 'eff_num_parties', 'minority_victory_margin', 'winner', vars), with=F], max, na.rm=TRUE)
sd <- sapply(data[, c('turnout', 'share_t1', 'victory_t1', 'share_non_coethnic_opposition_t1', 'eff_num_parties', 'minority_victory_margin', 'winner', vars), with=F], sd, na.rm=TRUE)
N <- sapply(data[, c('turnout', 'share_t1', 'victory_t1', 'share_non_coethnic_opposition_t1', 'eff_num_parties', 'minority_victory_margin', 'winner', vars), with=F], function(x) sum(!is.na(x)))

# create labels for table
vars_labels <- c('% ethnic minority','% high income', '% low econ deprivation', 'pop. density',
                  'candidate: Conservative', 'candidate: Labour', 'candidate: incumbent')

# create table
stats_table <- data.frame(variable=c('turnout rate', "incumbent party vote share", "incumbent party prob. victory",
                                     "opponent party vote share", "effective num. parties",
                                     'minority victory margin', 'minority win', vars_labels),
                          mean = unname(mean),
                          sd = unname(sd),
                          min = unname(min),
                          max = unname(max),
                          N = unname(N))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(stats_table))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: \\emph{N} is number of constituency-election years.}\n", sep = ""))

# print table
print(xtable(stats_table,
      align = "llccccc",       
      caption = "Summary Statistics",
      label = "table:summary_stats"),
      digits = c(0,0,2,2,2,2,0),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/table1.tex')


### Compute Table 2: Ethnic Minority Representation Effects on Turnout

# rename data
turnout <- data
# define vector of covariates
covars <- c('ethnicity_minority', 'gradea_c1', 'deprivation1', 'density', 'candidates_Labour', 'incumbent')


# Compute RD estimates with all constituencies
main <- rdrobust(turnout[, turnout],
                 turnout[, minority_victory_margin],
                 p = 1,
                 kernel = "triangular",
                 bwselect = "mserd")



main_control <- rdrobust(turnout[, turnout],
                         turnout[, minority_victory_margin],
                         covs = turnout[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         bwselect = "mserd")

# define majority-minority, majority-white based on share of ethnic minority population
turnout_majority <- turnout[ethnicity_minority<20]
turnout_minority <- turnout[ethnicity_minority>=20]

# Compute RD estimates with majority-white constituencies
main_maj <- rdrobust(turnout_majority[, turnout],
                     turnout_majority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_maj_control <- rdrobust(turnout_majority[, turnout],
                             turnout_majority[, minority_victory_margin],
                             covs = turnout_majority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")

# Compute RD estimates with majority-minority constituencies
main_min <- rdrobust(turnout_minority[, turnout],
                     turnout_minority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_min_control <- rdrobust(turnout_minority[, turnout],
                             turnout_minority[, minority_victory_margin],
                             covs = turnout_minority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")


# Compute R-squared of models to include in table
data_r <- turnout[(abs(minority_victory_margin) <= main$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]

out_main <- lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin],     
                                weights = data_r[,weight])


data_r <- turnout[(abs(minority_victory_margin) <= main_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control <- lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                        weights = data_r[,weight])

data_r <- turnout_majority[(abs(minority_victory_margin) <= main_maj$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_maj$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]

out_main_maj <- lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin],     
                                    weights = data_r[,weight])

data_r <- turnout_majority[(abs(minority_victory_margin) <= main_maj_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_maj_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control_maj <- lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                            weights = data_r[,weight])

data_r <- turnout_minority[(abs(minority_victory_margin) <= main_min$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_min$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_min <- lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin],     
                                    weights = data_r[,weight])

data_r <- turnout_minority[(abs(minority_victory_margin) <= main_min_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_min_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control_min <- lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                            weights = data_r[,weight])

# compute table
data_main_table <- as.data.frame(rbind(cbind(round(main$coef[1],3), round(main_control$coef[1],3), round(main_maj$coef[1],3), round(main_maj_control$coef[1],3), round(main_min$coef[1],3), round(main_min_control$coef[1],3)),
                                 cbind(paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'),
                                       paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
                                       paste0('[', round(main_maj$ci[3,1],3), ',', round(main_maj$ci[3,2],3), ']'),
                                       paste0('[', round(main_maj_control$ci[3,1],3), ',', round(main_maj_control$ci[3,2],3), ']'),
                                       paste0('[', round(main_min$ci[3,1],3), ',', round(main_min$ci[3,2],3), ']'),
                                       paste0('[', round(main_min_control$ci[3,1],3), ',', round(main_min_control$ci[3,2],3), ']')),
                                 cbind(round(main$beta_p_l[1],3), round(main_control$beta_p_l[1],3), round(main_maj$beta_p_l[1],3), round(main_maj_control$beta_p_l[1],3), round(main_min$beta_p_l[1],3), round(main_min_control$beta_p_l[1],3)),
                                 cbind(round(out_main$r.squared,2), round(out_main_control$r.squared,2), round(out_main_maj$r.squared,2), round(out_main_control_maj$r.squared,2), round(out_main_min$r.squared,2),  round(out_main_control_min$r.squared,2)),
                                 cbind(main$N_h[1]+main$N_h[2], main_control$N_h[1]+main_control$N_h[2], main_maj$N_h[1]+main_maj$N_h[2], main_maj_control$N_h[1]+main_maj_control$N_h[2], main_min$N_h[1]+main_min$N_h[2], main_min_control$N_h[1]+main_min_control$N_h[2]),
                                 cbind(main$N[1]+main$N[2], main_control$N[1]+main_control$N[2], main_maj$N[1]+main_maj$N[2], main_maj_control$N[1]+main_maj_control$N[2], main_min$N[1]+main_min$N[2], main_min_control$N[1]+main_min_control$N[2]),
                                 cbind(round(main$bws[1],2), round(main_control$bws[1],2), round(main_maj$bws[1],2), round(main_maj_control$bws[1],2), round(main_min$bws[1],2), round(main_min_control$bws[1],2))))

# create row and column names for table
table2 <- cbind(data.frame(labels=c('RD Estimate', ' ', 'Mean control', 'R2', 'Num. eff. obs.', 'Num. obs.', 'MSE-opt. bandwidth')),
      data_main_table)
colnames(table2) <- c('', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)')

# add a row at the bottom indicating whether specification inlcudes covariates
table2 <- rbind(table2, c('Controls', rep(c('N','Y'),3)))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(table2))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is the turnout rate in general election \\emph{t+1}. Average treatment effect at cutoff estimated with local linear regression with triangular kernel and MSE-optimal bandwidth. In brackets robust bias-corrected 95\\% CI. Appendix Table B.1 presents other relevant statistics, Table B.2 covariate coefficients.}\n", sep = ""))


# print table with selected statistics -- Table 2
print(xtable(table2,
             align = "llcccccc",       
             caption = "Ethnic Minority Representation Effects on Turnout",
             label = "table:effect_turnout_EWS_appendix"),
      digits = c(0,0,3,3,3,3,3,3),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/table2.tex')

             

### Compute p-value of difference in coefficients test reported in the Results section
z_stat <- function(beta1, beta2, se1, se2) {
  (beta1 - beta2) / sqrt( (se1)^2 + (se2)^2 )
}

diff_in_coeffs <- z_stat(main_maj_control$coef[1,1], main_min_control$coef[1,1], main_maj_control$se[1,1], main_min_control$se[1,1]) 
print(2*pnorm(q=diff_in_coeffs, lower.tail=FALSE))
###



### Compute Figure 1: Ethnic Minority Representation Effect on Incumbent's Party Vote Share

# Compute RD estimate on incumbent's vote share and store optimal bandwidth size
main_vs <- rdrobust(turnout[, share_t1],
                 turnout[, minority_victory_margin],
                 kernel = "triangular", p = 1, bwselect = "mserd")

bws_vs <- main_vs$bws[1]

# Plot average incumbent's vote share as a function of the margin between the ethnic minority candidate and her/his strongest white competitor within optimal bandwidth
mainp_vs <- rdplot(turnout[(abs(minority_victory_margin) <= bws_vs),
                         share_t1],
                turnout[(abs(minority_victory_margin) <= bws_vs),
                         minority_victory_margin],
                p = 1,
                x.label = "ethnic minority victory margin, t",
                y.label= "incumbent party's vote share, t+1", title = " ",
                col.lines = "#0033FF", col.dots = '#666666')

# Print and save plot
mainp_vs$rdplot +
  theme(plot.title = element_text(size=18, lineheight=3),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size =14),
        axis.title.y = element_text(size =14),
        axis.text.y = element_text(size = 12))
ggsave(file = './output/figure1.pdf', width=6, height=4.85)





###########################################
## Compute Tables and Figures in Appendix
###########################################

### Compute Appendix Table B.1: Ethnic Minority Representation Effects on Turnout (includes all relevant statistics)

coef <- c(main$coef[1,1], main_control$coef[1,1], main_maj$coef[1,1], main_maj_control$coef[1,1], main_min$coef[1,1], main_min_control$coef[1,1])
se <- c(main$se[1,1], main_control$se[1,1], main_maj$se[1,1], main_maj_control$se[1,1], main_min$se[1,1], main_min_control$se[1,1])
p_value <- c(main$pv[3,1], main_control$pv[3,1], main_maj$pv[3,1], main_maj_control$pv[3,1], main_min$pv[3,1], main_min_control$pv[3,1])
cil <- c(main$ci[3,1], main_control$ci[3,1], main_maj$ci[3,1], main_maj_control$ci[3,1], main_min$ci[3,1], main_min_control$ci[3,1])
cir <- c(main$ci[3,2], main_control$ci[3,2], main_maj$ci[3,2], main_maj_control$ci[3,2], main_min$ci[3,2], main_min_control$ci[3,2])
ci <- rbind(paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'),
            paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
            paste0('[', round(main_maj$ci[3,1],3), ',', round(main_maj$ci[3,2],3), ']'),
            paste0('[', round(main_maj_control$ci[3,1],3), ',', round(main_maj_control$ci[3,2],3), ']'),
            paste0('[', round(main_min$ci[3,1],3), ',', round(main_min$ci[3,2],3), ']'),
            paste0('[', round(main_min_control$ci[3,1],3), ',', round(main_min_control$ci[3,2],3), ']'))
mean_control <- c(main$beta_p_l[1], main_control$beta_p_l[1], main_maj$beta_p_l[1], main_maj_control$beta_p_l[1], main_min$beta_p_l[1], main_min_control$beta_p_l[1])
bw <-  c(main$bws[1], main_control$bws[1], main_maj$bws[1], main_maj_control$bws[1], main_min$bws[1], main_min_control$bws[1])
nl <- c(main$N_h[1], main_control$N_h[1], main_maj$N_h[1], main_maj_control$N_h[1], main_min$N_h[1], main_min_control$N_h[1])
nr <- c(main$N_h[2], main_control$N_h[2], main_maj$N_h[2], main_maj_control$N_h[2], main_min$N_h[2], main_min_control$N_h[2])
Nl <- c(main$N[1], main_control$N[1], main_maj$N[1], main_maj_control$N[1], main_min$N[1], main_min_control$N[1])
Nr <- c(main$N[2], main_control$N[2], main_maj$N[2], main_maj_control$N[2], main_min$N[2], main_min_control$N[2])

estimates_all <- data.frame(coef=coef,se=se,p_value=p_value,ci=ci,mean_control=mean_control,
                            bw=bw, n=nl+nr, N=Nl+Nr,
                            cov = rep(c('no', 'yes'), length(coef)/2),
                            sample = c('all', 'all', 'majority-white', 'majority-white', 'plurality-minority', 'plurality-minority'))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(estimates_all))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is the constituency-level turnout rate in the general election \\emph{t+1}. \\emph{RD estimate} is computed with local-linear regression within a symmetric MSE-optimal bandwidth. \\emph{se} is the conventional standard error, \\emph{p-value} and \\emph{95\\% CI} are robust bias-corrected. \\emph{mean control} indicates the average turnout rate in \\emph{t+1} in constituencies where ethnic minorities barely lose in \\emph{t}. \\emph{MSE opt bw} is the MSE-optimal bandwidth of minority vote-share winning margin around the victory threshold, \\emph{eff. N} is the sample size within the MSE-optimal bandwidth and \\emph{N} is the sample size. \\emph{cov} is a vector of controls including a dummy for candidate's incumbency, candidate's party dummies, constituency share of ethnic minority population, poor, highly educated, and population density. In \\emph{sample const}, \\emph{all} includes all constituencies where ethnic minority candidates run for Parliament in \\emph{t}, \\emph{majority white} includes constituencies with an ethnic-minority population share smaller than 20\\% (the median value in our sample) and \\emph{plurality minority} with a share of at least 20\\%. Turnout data is from the Electoral Commission, ethnic background of candidates is constructed by the authors, and constituency characteristics from 2001 and 2011 UK Decennial Census.}\n", sep = ""))

# create column names for table
colnames(estimates_all) <- c('RD estimate', 'se', 'p-value', '95% CI', 'mean control', 'MSE opt bw', 'eff. N', 'N', 'cov', 'sample const')

print(xtable(estimates_all,
             align = "ccccccccccc",       
             caption = "Ethnic Minority Representation Effects on Turnout",
             label = "table:effect_turnout_all_stats_alternative_definition",
             digits = c(0,3,3,3,3,3,3,0,0,0,0)),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableB1.tex')

### Compute Appendix Table B.2: Ethnic Minority Representation Effects on Turnout (includes covariate coefficients)

# Estimate covariate coefficients with linear regression within optimal bandwidth. Note inference is not robust bias-corrected, as rdrobust does not return covariate estimates

data_r <- turnout[(abs(minority_victory_margin) <= main_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                        weights = data_r[,weight])

data_r <- turnout_majority[(abs(minority_victory_margin) <= main_maj_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_maj_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control_maj <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                            weights = data_r[,weight])

data_r <- turnout_minority[(abs(minority_victory_margin) <= main_min_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_min_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control_min <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                            weights = data_r[,weight])

# compute table
data_main <- as.data.frame(rbind(cbind(round(main_control$coef[1],3), round(main_maj_control$coef[1],3), round(main_min_control$coef[1],3)),
                                 cbind(paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
                                       paste0('[', round(main_maj_control$ci[3,1],3), ',', round(main_maj_control$ci[3,2],3), ']'),
                                       paste0('[', round(main_min_control$ci[3,1],3), ',', round(main_min_control$ci[3,2],3), ']')),
                                 
                                 cbind(round(unname(out_main_control$coefficients[4]),3), round(unname(out_main_control_maj$coefficients[4]),3), round(unname(out_main_control_min$coefficients[4]),3)),
                                 cbind(paste0('[', round(unname(out_main_control$conf.low[4]),3), ',', round(unname(out_main_control$conf.high[4]),3), ']'),
                                       paste0('[', round(unname(out_main_control_maj$conf.low[4]),3), ',', round(unname(out_main_control_maj$conf.high[4]),3), ']'),
                                       paste0('[', round(unname(out_main_control_min$conf.low[4]),3), ',', round(unname(out_main_control_min$conf.high[4]),3), ']')),
                                 
                                 cbind(round(unname(out_main_control$coefficients[5]),3), round(unname(out_main_control_maj$coefficients[5]),3), round(unname(out_main_control_min$coefficients[5]),3)),
                                 cbind(paste0('[', round(unname(out_main_control$conf.low[5]),3), ',', round(unname(out_main_control$conf.high[5]),3), ']'),
                                       paste0('[', round(unname(out_main_control_maj$conf.low[5]),3), ',', round(unname(out_main_control_maj$conf.high[5]),3), ']'),
                                       paste0('[', round(unname(out_main_control_min$conf.low[5]),3), ',', round(unname(out_main_control_min$conf.high[5]),3), ']')),
                                 
                                 cbind(round(unname(out_main_control$coefficients[6]),3), round(unname(out_main_control_maj$coefficients[6]),3), round(unname(out_main_control_min$coefficients[6]),3)),
                                 cbind(paste0('[', round(unname(out_main_control$conf.low[6]),3), ',', round(unname(out_main_control$conf.high[6]),3), ']'),
                                       paste0('[', round(unname(out_main_control_maj$conf.low[6]),3), ',', round(unname(out_main_control_maj$conf.high[6]),3), ']'),
                                       paste0('[', round(unname(out_main_control_min$conf.low[6]),3), ',', round(unname(out_main_control_min$conf.high[6]),3), ']')),
                                 
                                 cbind(round(unname(out_main_control$coefficients[7]),3), round(unname(out_main_control_maj$coefficients[7]),3), round(unname(out_main_control_min$coefficients[7]),3)),
                                 cbind(paste0('[', round(unname(out_main_control$conf.low[7]),3), ',', round(unname(out_main_control$conf.high[7]),3), ']'),
                                       paste0('[', round(unname(out_main_control_maj$conf.low[7]),3), ',', round(unname(out_main_control_maj$conf.high[7]),3), ']'),
                                       paste0('[', round(unname(out_main_control_min$conf.low[7]),3), ',', round(unname(out_main_control_min$conf.high[7]),3), ']')),
                                 
                                 cbind(round(unname(out_main_control$coefficients[9]),3), round(unname(out_main_control_maj$coefficients[9]),3), round(unname(out_main_control_min$coefficients[9]),3)),
                                 cbind(paste0('[', round(unname(out_main_control$conf.low[9]),3), ',', round(unname(out_main_control$conf.high[9]),3), ']'),
                                       paste0('[', round(unname(out_main_control_maj$conf.low[9]),3), ',', round(unname(out_main_control_maj$conf.high[9]),3), ']'),
                                       paste0('[', round(unname(out_main_control_min$conf.low[9]),3), ',', round(unname(out_main_control_min$conf.high[9]),3), ']')),
                          
                                 cbind(round(unname(out_main_control$coefficients[8]),3), round(unname(out_main_control_maj$coefficients[8]),3), round(unname(out_main_control_min$coefficients[8]),3)),
                                 cbind(paste0('[', round(unname(out_main_control$conf.low[8]),3), ',', round(unname(out_main_control$conf.high[8]),3), ']'),
                                       paste0('[', round(unname(out_main_control_maj$conf.low[8]),3), ',', round(unname(out_main_control_maj$conf.high[8]),3), ']'),
                                       paste0('[', round(unname(out_main_control_min$conf.low[8]),3), ',', round(unname(out_main_control_min$conf.high[8]),3), ']')),
                                 
                                 cbind(round(main_control$beta_p_l[1],3), round(main_maj_control$beta_p_l[1],3), round(main_min_control$beta_p_l[1],3)),
                                 cbind(round(out_main_control$r.squared,2), round(out_main_control_maj$r.squared,2), round(out_main_control_min$r.squared,2)),
                                 cbind(main_control$N_h[1]+main_control$N_h[2], main_maj_control$N_h[1]+main_maj_control$N_h[2], main_min_control$N_h[1]+main_min_control$N_h[2]),
                                 cbind(main_control$N[1]+main_control$N[2], main_maj_control$N[1]+main_maj_control$N[2], main_min_control$N[1]+main_min_control$N[2]),
                                 cbind(round(main_control$bws[1],2), round(main_maj_control$bws[1],2), round(main_min_control$bws[1],2))))

# add row names to table
tableb2 <- cbind(data.frame(labels=c('RD estimate', ' ',
                                     '% ethnic min', ' ',
                                     '% econ grade >=C', ' ',
                                     '% low deprivation', ' ',
                                     'pop density', ' ',
                                     'incumbent', ' ',
                                     'Labour', ' ',
                                     'Mean control', 'R2', 'Num. eff. obs.', 'Num. obs.', 'MSE-opt bandwidth')), data_main)
# add column names to table
colnames(tableb2) <- c('', '(1)', '(2)', '(3)')

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(tableb2))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is the turnout rate in general election \\emph{t+1}. Average treatment effect at cutoff estimated with local linear regression with triangular kernel and MSE-optimal bandwidth. In brackets robust bias-corrected 95\\% CI for the RD coefficient and conventional heteroskedasticity-consistent 95\\% CI for the covariate coefficients.}\n", sep = ""))


# print table with covariate coefficients  -- Table B.2
print(xtable(tableb2,
             align = "llccc",       
             caption = "Ethnic Minority Representation Effects on Turnout",
             label = "table:effect_turnout_w_covariates"),
      digits = c(0,0,3,3,3),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableB2.tex')


### Compute Appendix Table B.3: Ethnic Minority Representation Effects on the Electorate Size

## Compute RD estimates
main <- rdrobust(turnout[, electorate],
                 turnout[, minority_victory_margin],
                 p = 1,
                 kernel = "triangular",
                 bwselect = "mserd")

main_control <- rdrobust(turnout[, electorate],
                         turnout[, minority_victory_margin],
                         covs = turnout[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         bwselect = "mserd")

# Compute RD estimates with majority-white constituencies
main_maj <- rdrobust(turnout_majority[, electorate],
                     turnout_majority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_maj_control <- rdrobust(turnout_majority[, electorate],
                             turnout_majority[, minority_victory_margin],
                             covs = turnout_majority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")

# Compute RD estimates with majority-minority constituencies
main_min <- rdrobust(turnout_minority[, electorate],
                     turnout_minority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_min_control <- rdrobust(turnout_minority[, electorate],
                             turnout_minority[, minority_victory_margin],
                             covs = turnout_minority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")

# Compute table
coef <- c(main$coef[1,1], main_control$coef[1,1], main_maj$coef[1,1], main_maj_control$coef[1,1], main_min$coef[1,1], main_min_control$coef[1,1])
se <- c(main$se[1,1], main_control$se[1,1], main_maj$se[1,1], main_maj_control$se[1,1], main_min$se[1,1], main_min_control$se[1,1])
p_value <- c(main$pv[3,1], main_control$pv[3,1], main_maj$pv[3,1], main_maj_control$pv[3,1], main_min$pv[3,1], main_min_control$pv[3,1])
ci <- rbind(paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'),
            paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
            paste0('[', round(main_maj$ci[3,1],3), ',', round(main_maj$ci[3,2],3), ']'),
            paste0('[', round(main_maj_control$ci[3,1],3), ',', round(main_maj_control$ci[3,2],3), ']'),
            paste0('[', round(main_min$ci[3,1],3), ',', round(main_min$ci[3,2],3), ']'),
            paste0('[', round(main_min_control$ci[3,1],3), ',', round(main_min_control$ci[3,2],3), ']'))
mean_control <- c(main$beta_p_l[1], main_control$beta_p_l[1], main_maj$beta_p_l[1], main_maj_control$beta_p_l[1], main_min$beta_p_l[1], main_min_control$beta_p_l[1])
bw <-  c(main$bws[1], main_control$bws[1], main_maj$bws[1], main_maj_control$bws[1], main_min$bws[1], main_min_control$bws[1])
nl <- c(main$N_h[1], main_control$N_h[1], main_maj$N_h[1], main_maj_control$N_h[1], main_min$N_h[1], main_min_control$N_h[1])
nr <- c(main$N_h[2], main_control$N_h[2], main_maj$N_h[2], main_maj_control$N_h[2], main_min$N_h[2], main_min_control$N_h[2])
Nl <- c(main$N[1], main_control$N[1], main_maj$N[1], main_maj_control$N[1], main_min$N[1], main_min_control$N[1])
Nr <- c(main$N[2], main_control$N[2], main_maj$N[2], main_maj_control$N[2], main_min$N[2], main_min_control$N[2])

estimates_all <- data.frame(coef=coef,se=se,p_value=p_value,ci=ci,mean_control=mean_control,
                            bw=bw, n=nl+nr, N=Nl+Nr,
                            cov = rep(c('no', 'yes'), length(coef)/2),
                            sample = c('all', 'all', 'majority-white', 'majority-white', 'plurality-minority', 'plurality-minority'))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(estimates_all))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is the number of registered voters in a constituency in general election \\emph{t+1}. \\emph{RD estimate} is computed with local-linear regression within a symmetric MSE-optimal bandwidth. \\emph{se} is the conventional standard error, \\emph{p-value} and \\emph{95\\% CI} are robust bias-corrected. \\emph{mean control} indicates the average number of registered voters at \\emph{t+1} in constituencies where ethnic minorities barely lose in \\emph{t}. \\emph{MSE opt bw} is the MSE-optimal bandwidth of minority vote-share winning margin around the victory threshold, \\emph{eff. N} is the sample size within the MSE-optimal bandwidth and \\emph{N} is the sample size. \\emph{cov} is a vector of controls including a dummy for candidate's incumbency, candidate's party dummies, constituency share of ethnic minority population, poor, and population density. In \\emph{sample const}, \\emph{all} includes all constituencies where ethnic minority candidates run for Parliament in \\emph{t}, \\emph{majority white} includes constituencies with an ethnic-minority population share smaller than 20\\% (the median value in our sample) and \\emph{plurality minority} with a share of at least 20\\%. Electorate size data is from the Electoral Commission, ethnic background of candidates is constructed by the authors, and constituency characteristics from 2001 and 2011 UK Decennial Census. }\n", sep = ""))

# create column names for table
colnames(estimates_all) <- c('RD estimate', 'se', 'p-value', '95% CI', 'mean control', 'MSE opt bw', 'eff. N', 'N', 'cov', 'sample const')

print(xtable(estimates_all,
             align = "ccccccccccc",       
             caption = "Ethnic Minority Representation Effects on Electorate Size",
             label = "table:effect_electorate_size",
             digits = c(0,3,3,3,3,3,3,0,0,0,0)),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableB3.tex')


### Compute Appendix Table B.4: Ethnic Minority Representation Effects on the Electorate Size (using bandwidth from main analysis on turnout--Table 2)

# Compute RD estimates
main <- rdrobust(turnout[, electorate],
                 turnout[, minority_victory_margin],
                 p = 1,
                 kernel = "triangular",
                 h = 21.5)

main_control <- rdrobust(turnout[, electorate],
                         turnout[, minority_victory_margin],
                         covs = turnout[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         h = 21.5)

# Compute RD estimates with majority-white constituencies
main_maj <- rdrobust(turnout_majority[, electorate],
                     turnout_majority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     h = 23)

main_maj_control <- rdrobust(turnout_majority[, electorate],
                             turnout_majority[, minority_victory_margin],
                             covs = turnout_majority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             h = 18)

# Compute RD estimates with majority-minority constituencies
main_min <- rdrobust(turnout_minority[, electorate],
                     turnout_minority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     h = 21)

main_min_control <- rdrobust(turnout_minority[, electorate],
                             turnout_minority[, minority_victory_margin],
                             covs = turnout_minority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             h = 22)

# Compute table
coef <- c(main$coef[1,1], main_control$coef[1,1], main_maj$coef[1,1], main_maj_control$coef[1,1], main_min$coef[1,1], main_min_control$coef[1,1])
se <- c(main$se[1,1], main_control$se[1,1], main_maj$se[1,1], main_maj_control$se[1,1], main_min$se[1,1], main_min_control$se[1,1])
p_value <- c(main$pv[3,1], main_control$pv[3,1], main_maj$pv[3,1], main_maj_control$pv[3,1], main_min$pv[3,1], main_min_control$pv[3,1])
ci <- rbind(paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'),
            paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
            paste0('[', round(main_maj$ci[3,1],3), ',', round(main_maj$ci[3,2],3), ']'),
            paste0('[', round(main_maj_control$ci[3,1],3), ',', round(main_maj_control$ci[3,2],3), ']'),
            paste0('[', round(main_min$ci[3,1],3), ',', round(main_min$ci[3,2],3), ']'),
            paste0('[', round(main_min_control$ci[3,1],3), ',', round(main_min_control$ci[3,2],3), ']'))
mean_control <- c(main$beta_p_l[1], main_control$beta_p_l[1], main_maj$beta_p_l[1], main_maj_control$beta_p_l[1], main_min$beta_p_l[1], main_min_control$beta_p_l[1])
bw <-  c(main$bws[1], main_control$bws[1], main_maj$bws[1], main_maj_control$bws[1], main_min$bws[1], main_min_control$bws[1])
nl <- c(main$N_h[1], main_control$N_h[1], main_maj$N_h[1], main_maj_control$N_h[1], main_min$N_h[1], main_min_control$N_h[1])
nr <- c(main$N_h[2], main_control$N_h[2], main_maj$N_h[2], main_maj_control$N_h[2], main_min$N_h[2], main_min_control$N_h[2])
Nl <- c(main$N[1], main_control$N[1], main_maj$N[1], main_maj_control$N[1], main_min$N[1], main_min_control$N[1])
Nr <- c(main$N[2], main_control$N[2], main_maj$N[2], main_maj_control$N[2], main_min$N[2], main_min_control$N[2])

estimates_all <- data.frame(coef=coef,se=se,p_value=p_value,ci=ci,mean_control=mean_control,
                            bw=bw, n=nl+nr, N=Nl+Nr,
                            cov = rep(c('no', 'yes'), length(coef)/2),
                            sample = c('all', 'all', 'majority-white', 'majority-white', 'plurality-minority', 'plurality-minority'))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(estimates_all))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is the number of registered voters in a constituency in general election \\emph{t+1}. \\emph{RD estimate} is computed with local-linear regression within a symmetric MSE-optimal bandwidth. \\emph{se} is the conventional standard error, \\emph{p-value} and \\emph{95\\% CI} are robust bias-corrected. \\emph{mean control} indicates the average number of registered voters at \\emph{t+1} in constituencies where ethnic minorities barely lose in \\emph{t}. \\emph{opt bw} is the bandwidth of minority vote-share winning margin around the victory threshold used in the main turnout analysis, \\emph{eff. N} is the sample size within the MSE-optimal bandwidth and \\emph{N} is the sample size. \\emph{cov} is a vector of controls including a dummy for candidate's incumbency, candidate's party dummies, constituency share of ethnic minority population, poor, and population density. In \\emph{sample const}, \\emph{all} includes all constituencies where ethnic minority candidates run for Parliament in \\emph{t}, \\emph{majority white} includes constituencies with an ethnic-minority population share smaller than 20\\% (the median value in our sample) and \\emph{plurality minority} with a share of at least 20\\%. Electorate size data is from the Electoral Commission, ethnic background of candidates is constructed by the authors, and constituency characteristics from 2001 and 2011 UK Decennial Census.}\n", sep = ""))

# create column names for table
colnames(estimates_all) <- c('RD estimate', 'se', 'p-value', '95% CI', 'mean control', 'MSE opt bw', 'eff. N', 'N', 'cov', 'sample const')

print(xtable(estimates_all,
             align = "ccccccccccc",       
             caption = "Ethnic Minority Representation Effects on Electorate Size",
             label = "table:effect_electorate_size_bw_turnout",
             digits = c(0,3,3,3,3,3,3,0,0,0,0)),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableB4.tex')


### Compute Appendix Table B.5: Ethnic Minority Representation Effects on Turnout (using alternative definition of majority-white/majority-minority constituencies based on sample of British Election Study)

# Compute RD estimates with all constituencies
main <- rdrobust(turnout[, turnout],
                 turnout[, minority_victory_margin],
                 p = 1,
                 kernel = "triangular",
                 bwselect = "mserd")


main_control <- rdrobust(turnout[, turnout],
                         turnout[, minority_victory_margin],
                         covs = turnout[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         bwselect = "mserd")

# define majority-minority/majority-white constituencies based on the sample of the British Election Study (BES). Specifically, majority-white includes constituencies with only white respondents in the BES and majority-minority with BAME respondents. 
turnout_majority <- turnout[cluster_id_bes_white==1]
turnout_minority <- turnout[cluster_id_bes_white==0]

# Compute RD estimates with majority-white constituencies
main_maj <- rdrobust(turnout_majority[, turnout],
                     turnout_majority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_maj_control <- rdrobust(turnout_majority[, turnout],
                             turnout_majority[, minority_victory_margin],
                             covs = turnout_majority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")

# Compute RD estimates with majority-minority constituencies
main_min <- rdrobust(turnout_minority[, turnout],
                     turnout_minority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_min_control <- rdrobust(turnout_minority[, turnout],
                             turnout_minority[, minority_victory_margin],
                             covs = turnout_minority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")

# Compute table
coef <- c(main$coef[1,1], main_control$coef[1,1], main_maj$coef[1,1], main_maj_control$coef[1,1], main_min$coef[1,1], main_min_control$coef[1,1])
se <- c(main$se[1,1], main_control$se[1,1], main_maj$se[1,1], main_maj_control$se[1,1], main_min$se[1,1], main_min_control$se[1,1])
p_value <- c(main$pv[3,1], main_control$pv[3,1], main_maj$pv[3,1], main_maj_control$pv[3,1], main_min$pv[3,1], main_min_control$pv[3,1])
ci <- rbind(paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'),
            paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
            paste0('[', round(main_maj$ci[3,1],3), ',', round(main_maj$ci[3,2],3), ']'),
            paste0('[', round(main_maj_control$ci[3,1],3), ',', round(main_maj_control$ci[3,2],3), ']'),
            paste0('[', round(main_min$ci[3,1],3), ',', round(main_min$ci[3,2],3), ']'),
            paste0('[', round(main_min_control$ci[3,1],3), ',', round(main_min_control$ci[3,2],3), ']'))
mean_control <- c(main$beta_p_l[1], main_control$beta_p_l[1], main_maj$beta_p_l[1], main_maj_control$beta_p_l[1], main_min$beta_p_l[1], main_min_control$beta_p_l[1])
bw <-  c(main$bws[1], main_control$bws[1], main_maj$bws[1], main_maj_control$bws[1], main_min$bws[1], main_min_control$bws[1])
nl <- c(main$N_h[1], main_control$N_h[1], main_maj$N_h[1], main_maj_control$N_h[1], main_min$N_h[1], main_min_control$N_h[1])
nr <- c(main$N_h[2], main_control$N_h[2], main_maj$N_h[2], main_maj_control$N_h[2], main_min$N_h[2], main_min_control$N_h[2])
Nl <- c(main$N[1], main_control$N[1], main_maj$N[1], main_maj_control$N[1], main_min$N[1], main_min_control$N[1])
Nr <- c(main$N[2], main_control$N[2], main_maj$N[2], main_maj_control$N[2], main_min$N[2], main_min_control$N[2])

estimates_all <- data.frame(coef=coef,se=se,p_value=p_value,ci=ci,mean_control=mean_control,
                            bw=bw, n=nl+nr, N=Nl+Nr,
                            cov = rep(c('no', 'yes'), length(coef)/2),
                            sample = c('all', 'all', 'majority-white', 'majority-white', 'plurality-minority', 'plurality-minority'))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(estimates_all))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is the constituency-level turnout rate in the general election \\emph{t+1}. \\emph{RD estimate} is computed with local-linear regression within a symmetric MSE-optimal bandwidth. \\emph{se} is the conventional standard error, \\emph{p-value} and \\emph{95\\% CI} are robust bias-corrected. \\emph{mean control} indicates the average turnout rate in \\emph{t+1} in constituencies where ethnic minorities barely lose in \\emph{t}. \\emph{MSE opt bw} is the MSE-optimal bandwidth of minority vote-share winning margin around the victory threshold, \\emph{eff. N} is the sample size within the MSE-optimal bandwidth and \\emph{N} is the sample size. \\emph{cov} is a vector of controls including a dummy for candidate's incumbency, candidate's party dummies, constituency share of ethnic minority population, poor, highly educated, and population density. In \\emph{sample const}, \\emph{all} includes all constituencies where ethnic minority candidates run for Parliament in \\emph{t}, \\emph{majority white} includes constituencies with only white respondents in The British Election Study and \\emph{plurality minority} with BAME respondents. Turnout data is from the Electoral Commission, ethnic background of candidates is constructed by the authors, and constituency characteristics from 2001 and 2011 UK Decennial Census.}\n", sep = ""))

# create column names for table
colnames(estimates_all) <- c('RD estimate', 'se', 'p-value', '95% CI', 'mean control', 'MSE opt bw', 'eff. N', 'N', 'cov', 'sample const')

print(xtable(estimates_all,
             align = "ccccccccccc",       
             caption = "Ethnic Minority Representation Effects on Turnout",
             label = "table:effect_turnout_all_stats",
             digits = c(0,3,3,3,3,3,3,0,0,0,0)),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableB5.tex')


### Compute Table B.6: Ethnic Minority Representation Effects on Turnout (including data from the 2005 General Election)

# append 2005 election data
data_2005 <- fread('./data/clean/data_2005_analysis.csv')
turnout <- rbind(turnout, data_2005, fill=TRUE)

# define majority-minority, majority-white based on share of ethnic minority population
turnout_majority <- turnout[ethnicity_minority<20]
turnout_minority <- turnout[ethnicity_minority>=20]

# define (smaller) vector of covariates (as social grade and deprivation are not available in 2005 data)
covars <- c('ethnicity_minority', 'density', 'candidates_Labour', 'incumbent')

# compute RD estimates with all constituencies
main <- rdrobust(turnout[, turnout],
                 turnout[, minority_victory_margin],
                 p = 1,
                 kernel = "triangular",
                 bwselect = "mserd")

main_control <- rdrobust(turnout[, turnout],
                         turnout[, minority_victory_margin],
                         covs = turnout[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         bwselect = "mserd")

# compute RD estimates with majority-white constituencies
main_maj <- rdrobust(turnout_majority[, turnout],
                     turnout_majority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_maj_control <- rdrobust(turnout_majority[, turnout],
                             turnout_majority[, minority_victory_margin],
                             covs = turnout_majority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")

# compute RD estimates with majority-minority constituencies
main_min <- rdrobust(turnout_minority[, turnout],
                     turnout_minority[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")

main_min_control <- rdrobust(turnout_minority[, turnout],
                             turnout_minority[, minority_victory_margin],
                             covs = turnout_minority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd")


# compute r-squared of models and covariate coefficients to include in table -- covariate coefficients are estimated with linear regression within optimal bandwidth. Note inference is not robust bias-corrected, as rdrobust does not return covariate estimates

data_r <- turnout[(abs(minority_victory_margin) <= main$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]

out_main <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin],     
                                weights = data_r[,weight])


data_r <- turnout[(abs(minority_victory_margin) <= main_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                        weights = data_r[,weight])

data_r <- turnout_majority[(abs(minority_victory_margin) <= main_maj$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_maj$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]

out_main_maj <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin],     
                                    weights = data_r[,weight])

data_r <- turnout_majority[(abs(minority_victory_margin) <= main_maj_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_maj_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control_maj <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                            weights = data_r[,weight])

data_r <- turnout_minority[(abs(minority_victory_margin) <= main_min$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_min$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_min <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin],     
                                    weights = data_r[,weight])

data_r <- turnout_minority[(abs(minority_victory_margin) <= main_min_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_min_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars, with=FALSE]

out_main_control_min <- estimatr::lm_robust(data_r[ , turnout] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),     
                                            weights = data_r[,weight])
# Compute table
data_main <- as.data.frame(rbind(cbind(round(main$coef[1],3), round(main_control$coef[1],3), round(main_maj$coef[1],3), round(main_maj_control$coef[1],3), round(main_min$coef[1],3), round(main_min_control$coef[1],3)),
                    cbind(paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'),
                          paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
                          paste0('[', round(main_maj$ci[3,1],3), ',', round(main_maj$ci[3,2],3), ']'),
                          paste0('[', round(main_maj_control$ci[3,1],3), ',', round(main_maj_control$ci[3,2],3), ']'),
                          paste0('[', round(main_min$ci[3,1],3), ',', round(main_min$ci[3,2],3), ']'),
                          paste0('[', round(main_min_control$ci[3,1],3), ',', round(main_min_control$ci[3,2],3), ']')),
                    
                    cbind('',round(unname(out_main_control$coefficients[4]),3), '',round(unname(out_main_control_maj$coefficients[4]),3), '', round(unname(out_main_control_min$coefficients[4]),3)),
                    cbind('',
                          paste0('[', round(unname(out_main_control$conf.low[4]),3), ',', round(unname(out_main_control$conf.high[4]),3), ']'),
                          '',
                          paste0('[', round(unname(out_main_control_maj$conf.low[4]),3), ',', round(unname(out_main_control_maj$conf.high[4]),3), ']'),
                          '',
                          paste0('[', round(unname(out_main_control_min$conf.low[4]),3), ',', round(unname(out_main_control_min$conf.high[4]),3), ']')),
                    
                    cbind('',round(unname(out_main_control$coefficients[5]),3),'',round(unname(out_main_control_maj$coefficients[5]),3), '', round(unname(out_main_control_min$coefficients[5]),3)),
                    cbind('',
                          paste0('[', round(unname(out_main_control$conf.low[5]),3), ',', round(unname(out_main_control$conf.high[5]),3), ']'),
                          '',
                          paste0('[', round(unname(out_main_control_maj$conf.low[5]),3), ',', round(unname(out_main_control_maj$conf.high[5]),3), ']'),
                          '',
                    paste0('[', round(unname(out_main_control_min$conf.low[5]),3), ',', round(unname(out_main_control_min$conf.high[5]),3), ']')),

                    cbind('',round(unname(out_main_control$coefficients[6]),3), '',round(unname(out_main_control_maj$coefficients[6]),3),'', round(unname(out_main_control_min$coefficients[6]),3)),
                    cbind('',
                          paste0('[', round(unname(out_main_control$conf.low[6]),3), ',', round(unname(out_main_control$conf.high[6]),3), ']'),
                          '',
                          paste0('[', round(unname(out_main_control_maj$conf.low[6]),3), ',', round(unname(out_main_control_maj$conf.high[6]),3), ']'),
                          '',
                    paste0('[', round(unname(out_main_control_min$conf.low[6]),3), ',', round(unname(out_main_control_min$conf.high[6]),3), ']')),

                    cbind('',round(unname(out_main_control$coefficients[7]),3),'',round(unname(out_main_control_maj$coefficients[7]),3), '', round(unname(out_main_control_min$coefficients[7]),3)),
                    cbind('',
                          paste0('[', round(unname(out_main_control$conf.low[7]),3), ',', round(unname(out_main_control$conf.high[7]),3), ']'),
                          '',
                          paste0('[', round(unname(out_main_control_maj$conf.low[7]),3), ',', round(unname(out_main_control_maj$conf.high[7]),3), ']'),
                          '',
                    paste0('[', round(unname(out_main_control_min$conf.low[7]),3), ',', round(unname(out_main_control_min$conf.high[7]),3), ']')),

                    cbind(round(main$beta_p_l[1],3), round(main_control$beta_p_l[1],3), round(main_maj$beta_p_l[1],3), round(main_maj_control$beta_p_l[1],3), round(main_min$beta_p_l[1],3), round(main_min_control$beta_p_l[1],3)),
                    cbind(round(out_main$r.squared,2), round(out_main_control$r.squared,2), round(out_main_maj$r.squared,2), round(out_main_control_maj$r.squared,2), round(out_main_min$r.squared,2),  round(out_main_control_min$r.squared,2)),
                    cbind(main$N_h[1]+main$N_h[2], main_control$N_h[1]+main_control$N_h[2], main_maj$N_h[1]+main_maj$N_h[2], main_maj_control$N_h[1]+main_maj_control$N_h[2], main_min$N_h[1]+main_min$N_h[2], main_min_control$N_h[1]+main_min_control$N_h[2]),
                    cbind(main$N[1]+main$N[2], main_control$N[1]+main_control$N[2], main_maj$N[1]+main_maj$N[2], main_maj_control$N[1]+main_maj_control$N[2], main_min$N[1]+main_min$N[2], main_min_control$N[1]+main_min_control$N[2]),
                    cbind(round(main$bws[1],2), round(main_control$bws[1],2), round(main_maj$bws[1],2), round(main_maj_control$bws[1],2), round(main_min$bws[1],2), round(main_min_control$bws[1],2))))

# add row names to table
tableb6 <- cbind(data.frame(labels=c('RD estimate', ' ',
                                     '% ethnic min', ' ',
                                     'pop density', ' ',
                                     'Labour', ' ',
                                     'incumbent', ' ',
                                     'Mean control', 'R2', 'Num. eff. obs.', 'Num. obs.', 'MSE-opt bandwidth')), data_main)

# add a row at the bottom indicating whether specification includes covariates
tableb6 <- rbind(tableb6, c('Controls', rep(c('N','Y'),3)))

# add column names to table
colnames(tableb6) <- c('', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)')

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(tableb6))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is the turnout rate in general election \\emph{t+1}. Average treatment effect at cutoff estimated with local linear regression with triangular kernel and MSE-optimal bandwidth. In brackets robust bias-corrected 95\\% CI for the RD coefficient and conventional heteroskedasticity-consistent 95\\% CI for the covariate coefficients.}\n", sep = ""))

# print table
print(xtable(tableb6,
             align = "llcccccc",       
             caption = "Ethnic Minority Representation Effects on Turnout",
             label = "table:effect_turnout_2005"),
      digits = c(0,0,3,3,3,3,3,3),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableB6.tex')


## Compute p-value of difference in coefficients test reported in Appendix B.8
diff_in_coeffs <- z_stat(main_maj_control$coef[1,1], main_min_control$coef[1,1], main_maj_control$se[1,1], main_min_control$se[1,1]) 
print(2*pnorm(q=diff_in_coeffs, lower.tail=FALSE))


### Compute Table C.1: Ethnic Minority Victory Effects on Turnout (using The British Election Study data)
bes_all <- fread(file = './data/clean/data_bes_analysis.csv')

# define vector of covariates (including individual-level covariates)
covars_bes <- c('Age',
                'single',
                'employed',
                'own_house',
                'low_income',
                'male',
                'past_turnout_ge',
                'candidates_Labour',
                'incumbent')

# subset to sample of white voters in England, Scotland and Wales
bes <- bes_all[white==1]
bes <- bes[grep('^(E|W|S)', ons_id)]

# define sample with non-missing covariates
bes_complete <- bes[complete.cases(bes[, covars_bes, with = FALSE])]

## Compute RD estimates
# note rdrobust package warning can be ignored: we're clustering individual observations by constituency-election year
main_full <- rdrobust(bes[, turnout_ge],
                      bes[, minority_victory_margin],
                      p = 1,
                      kernel = "triangular",
                      bwselect = "mserd",
                      cluster = bes[, cluster])

main <- rdrobust(bes_complete[, turnout_ge],
                 bes_complete[, minority_victory_margin],
                 p = 1,
                 kernel = "triangular",
                 bwselect = "mserd",
                 cluster = bes_complete[, cluster])

main_control <- rdrobust(bes_complete[, turnout_ge],
                         bes_complete[, minority_victory_margin],
                         covs = bes_complete[, covars_bes, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         bwselect = "mserd",
                         cluster = bes_complete[, cluster])

# compute r-squared of models and covariate coefficients to include in table -- covariate coefficients are estimated with linear regression within optimal bandwidth. Note inference is not robust bias-corrected, as rdrobust does not return covariate estimates
data_r <- bes[(abs(minority_victory_margin) <= main_full$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_full$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
out_main_full <- estimatr::lm_robust(data_r[ , turnout_ge] ~ data_r[,treat]*data_r[,minority_victory_margin],
                                     clusters = data_r[,cluster],
                                     weights = data_r[,weight])

data_r <- bes_complete[(abs(minority_victory_margin) <= main$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
out_main <- estimatr::lm_robust(data_r[ , turnout_ge] ~ data_r[,treat]*data_r[,minority_victory_margin],
                                clusters = data_r[,cluster],
                                weights = data_r[,weight])

data_r <- bes_complete[(abs(minority_victory_margin) <= main_control$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_control$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars_bes, with=FALSE]
out_main_control <- estimatr::lm_robust(data_r[ , turnout_ge] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),
                                        clusters = data_r[,cluster],
                                        weights = data_r[,weight])

## subset to sample of BAME voters in England, Scotland and Wales
bes_min <- bes_all[white==0]
bes_min <- bes_min[grep('^(E|W|S)', ons_id)]

# define sample with non-missing covariates
bes_complete_min <- bes_min[complete.cases(bes_min[, covars_bes, with = FALSE])]

## Compute RD estimates
main_full_min <- rdrobust(bes_min[, turnout_ge],
                          bes_min[, minority_victory_margin],
                          p = 1,
                          kernel = "triangular",
                          bwselect = "mserd",
                          cluster = bes_min[, cluster])

main_min <- rdrobust(bes_complete_min[, turnout_ge],
                     bes_complete_min[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd",
                     cluster = bes_complete_min[, cluster])

main_control_min <- rdrobust(bes_complete_min[, turnout_ge],
                             bes_complete_min[, minority_victory_margin],
                             covs = bes_complete_min[, covars_bes, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             bwselect = "mserd",
                             cluster = bes_complete_min[, cluster])

# compute r-squared of models and covariate coefficients to include in table -- covariate coefficients are estimated with linear regression within optimal bandwidth. Note inference is not robust bias-corrected, as rdrobust does not return covariate estimates
data_r <- bes_min[(abs(minority_victory_margin) <= main_full_min$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_full_min$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
out_main_full_min <- estimatr::lm_robust(data_r[ , turnout_ge] ~ data_r[,treat]*data_r[,minority_victory_margin],
                                         clusters = data_r[,cluster],
                                         weights = data_r[,weight])

data_r <- bes_complete_min[(abs(minority_victory_margin) <= main_min$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_min$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
out_main_min <- estimatr::lm_robust(data_r[ , turnout_ge] ~ data_r[,treat]*data_r[,minority_victory_margin],
                                    clusters = data_r[,cluster],
                                    weights = data_r[,weight])

data_r <- bes_complete_min[(abs(minority_victory_margin) <= main_control_min$bws[1])]
data_r[,weight:= (1-abs((minority_victory_margin/main_control_min$bws[1])))]
data_r[,treat:=ifelse(minority_victory_margin>0,1,0)]
covs <- data_r[, covars_bes, with=FALSE]
out_main_control_min <- estimatr::lm_robust(data_r[ , turnout_ge] ~ data_r[,treat]*data_r[,minority_victory_margin] + as.matrix(covs),
                                            clusters = data_r[,cluster],
                                            weights = data_r[,weight])

## Compute table
data_main <- as.data.frame(rbind(cbind(round(main_full$coef[1],3), round(main$coef[1],3), round(main_control$coef[1],3),
                                       round(main_full_min$coef[1],3), round(main_min$coef[1],3), round(main_control_min$coef[1],3)),
                                 cbind(paste0('[', round(main_full$ci[3,1],3), ',', round(main_full$ci[3,2],3), ']'),
                                       paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'),
                                       paste0('[', round(main_control$ci[3,1],3), ',', round(main_control$ci[3,2],3), ']'),
                                       paste0('[', round(main_full_min$ci[3,1],3), ',', round(main_full_min$ci[3,2],3), ']'),
                                       paste0('[', round(main_min$ci[3,1],3), ',', round(main_min$ci[3,2],3), ']'),
                                       paste0('[', round(main_control_min$ci[3,1],3), ',', round(main_control_min$ci[3,2],3), ']')),
                                 cbind('', '', round(unname(out_main_control$coefficients[4]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[4]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[4]),3), ',', round(unname(out_main_control$conf.high[4]),3), ']'),
                                      '',
                                      '',
                                      paste0('[', round(unname(out_main_control_min$conf.low[4]),3), ',', round(unname(out_main_control_min$conf.high[4]),3), ']')),
                                 
                                 cbind('', '', round(unname(out_main_control$coefficients[5]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[5]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[5]),3), ',', round(unname(out_main_control$conf.high[5]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[5]),3), ',', round(unname(out_main_control_min$conf.high[5]),3), ']')),
                                 
                                 cbind('', '', round(unname(out_main_control$coefficients[6]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[6]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[6]),3), ',', round(unname(out_main_control$conf.high[6]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[6]),3), ',', round(unname(out_main_control_min$conf.high[6]),3), ']')),
                                 
                                 cbind('', '', round(unname(out_main_control$coefficients[7]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[7]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[7]),3), ',', round(unname(out_main_control$conf.high[7]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[7]),3), ',', round(unname(out_main_control_min$conf.high[7]),3), ']')),
                                 cbind('', '', round(unname(out_main_control$coefficients[8]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[8]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[8]),3), ',', round(unname(out_main_control$conf.high[8]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[8]),3), ',', round(unname(out_main_control_min$conf.high[8]),3), ']')),
                                 cbind('', '', round(unname(out_main_control$coefficients[9]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[9]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[9]),3), ',', round(unname(out_main_control$conf.high[9]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[9]),3), ',', round(unname(out_main_control_min$conf.high[9]),3), ']')),
                                 cbind('', '', round(unname(out_main_control$coefficients[10]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[10]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[10]),3), ',', round(unname(out_main_control$conf.high[10]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[10]),3), ',', round(unname(out_main_control_min$conf.high[10]),3), ']')),
                                 cbind('', '', round(unname(out_main_control$coefficients[11]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[11]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[11]),3), ',', round(unname(out_main_control$conf.high[11]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[11]),3), ',', round(unname(out_main_control_min$conf.high[11]),3), ']')),
                                 cbind('', '', round(unname(out_main_control$coefficients[12]),3),
                                       '', '', round(unname(out_main_control_min$coefficients[12]),3)),
                                 cbind('',
                                       '',
                                       paste0('[', round(unname(out_main_control$conf.low[12]),3), ',', round(unname(out_main_control$conf.high[12]),3), ']'),
                                       '',
                                       '',
                                       paste0('[', round(unname(out_main_control_min$conf.low[12]),3), ',', round(unname(out_main_control_min$conf.high[12]),3), ']')),
                                 
                                 cbind(round(main_full$beta_p_l[1],3), round(main$beta_p_l[1],3), round(main_control$beta_p_l[1],3),
                                       round(main_full_min$beta_p_l[1],3), round(main_min$beta_p_l[1],3), round(main_control_min$beta_p_l[1],3)),
                                 cbind(round(out_main_full$r.squared,2), round(out_main$r.squared,2), round(out_main_control$r.squared,2),
                                       round(out_main_full_min$r.squared,2), round(out_main_min$r.squared,2), round(out_main_control_min$r.squared,2)),
                                 cbind(main_full$N_h[1]+main_full$N_h[2], main$N_h[1]+main$N_h[2], main_control$N_h[1]+main_control$N_h[2],
                                       main_full_min$N_h[1]+main_full_min$N_h[2], main_min$N_h[1]+main_min$N_h[2], main_control_min$N_h[1]+main_control_min$N_h[2]),
                                 cbind(main_full$N[1]+main_full$N[2], main$N[1]+main$N[2], main_control$N[1]+main_control$N[2],
                                       main_full_min$N[1]+main_full_min$N[2], main_min$N[1]+main_min$N[2], main_control_min$N[1]+main_control_min$N[2]),
                                 cbind(main_full$M[1]+main_full$M[2], main$M[1]+main$M[2], main_control$M[1]+main_control$M[2],
                                       main_full_min$M[1]+main_full_min$M[2], main_min$M[1]+main_min$M[2], main_control_min$M[1]+main_control_min$M[2]),
                                 cbind(round(main_full$bws[1],2), round(main$bws[1],2), round(main_control$bws[1],2),
                                       round(main_full_min$bws[1],2), round(main_min$bws[1],2), round(main_control_min$bws[1],2))))

tablec1 <- cbind(data.frame(labels=c('RD estimate', ' ',
                                     'age', ' ',
                                     'single', ' ',
                                     'employed', ' ',
                                     'house owner', ' ',
                                     'low income', ' ',
                                     'male', ' ',
                                     'voted past election', ' ',
                                     'Labour', ' ',
                                     'incumbent', ' ',
                                     'Mean control', 'R2', 'Num. eff. obs.', 'Num. obs.', 'Num. clusters', 'MSE-opt bandwidth')), data_main)

# add rows at the bottom indicating whether specification includes covariates and the sample
tablec1 <- rbind(tablec1, c('Controls', rep(c('No','No', 'Yes'),2)), c('Sample', rep(c('Full', 'Complete', 'Complete'),2)))

# add column names to table
colnames(tablec1) <- c('', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)')

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(tablec1))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable is self-reported turnout in general election \\emph{t+1}. Average treatment effect at cutoff estimated with local linear regression with triangular kernel and MSE-optimal bandwidth. In brackets robust bias-corrected 95\\% CI. Standard errors are clustered by constituency-election. \\emph{Num. eff. obs.} is the sample size within the MSE-optimal bandwidth, \\emph{Num. obs} the sample size, and \\emp{Num. clusters} the number of constituency-election years in the sample. \\emph{MSE-optimal bandwidth} is the MSE-optimal bandwidth of vote-share winning margin around the victory threshold. \\emph{Controls} include respondents' age, gender, and turnout in the previous election), a constituency's share of the population that is employed and low income, and a candidate's incumbency and political party. \\emph{Sample} indicates the sample used in the analysis: \\emph{Full} stands for the full sample of respondents and \\emph{Complete} for a complete cases sample with no missing values for respondent's predetermined variables.}\n", sep = ""))

# print table
print(xtable(tablec1,
             align = "llcccccc",       
             caption = "Ethnic Minority Representation Effects on Turnout",
             label = "table:effect_turnout_bes"),
      digits = c(0,0,3,3,3,3,3,3),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableC1.tex')


## Compute p-value of difference in coefficients test reported in Appendix C
diff_in_coeffs <- z_stat(main_control$coef[1,1], main_control_min$coef[1,1], main_control$se[1,1], main_control_min$se[1,1]) 
print(2*pnorm(q=diff_in_coeffs, lower.tail=FALSE))



### Compute Table D.1: Ethnic Minority Representation Effects on Vote Shares, Incumbent's Probability of Winning and Effective Number of Parties

# get main analysis data (2010--2019)
turnout <- data

# define majority-minority, majority-majority based on share of ethnic minority population
turnout_majority <- turnout[ethnicity_minority<20]
turnout_minority <- turnout[ethnicity_minority>=20]


# define list of datasets to compute RD estimates
data_list <- list(turnout, turnout_majority, turnout_minority)

# define list of outcomes
outcome_list <- c('turnout', 'share_t1', 'share_non_coethnic_opposition_t1', 'victory_t1', 'eff_num_parties')

# define vectors to store estimates
coef <- c()
se <- c()
p_value <- c()
cil <- c()
cir <- c()
ci <- c()
mean_control <- c()
bw <- c()
nl <- c()
nr <- c()
Nl <- c()
Nr <- c()

# Compute RD estimates for 3 datasets and 5 outcomes
for (j in seq(1,length(outcome_list),1)) {
  
  for (i in seq(1,length(data_list),1)) {
    
    data_r <- data_list[[i]]
    outcome <- outcome_list[j]
    main<- rdrobust(data_r[, get(outcome)],
                    data_r[, minority_victory_margin],
                    p = 1,
                    kernel = "triangular",
                    bwselect = "mserd")
    
    coef <- c(coef, main$coef[1,1])
    se <- c(se, main$se[1,1])
    p_value <- c(p_value, main$pv[3,1])
    cil <- c(cil, main$ci[3,1])
    cir <- c(cir, main$ci[3,2])
    ci <- c(ci, paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'))
    mean_control <- c(mean_control, main$beta_p_l[1])
    bw <-  c(bw, main$bws[1])
    nl <- c(nl, main$N_h[1])
    nr <- c(nr, main$N_h[2])
    Nl <- c(Nl, main$N[1])
    Nr <- c(Nr, main$N[2])
    
  }
}

# Compute table
sample <- c('all', 'majority-white', 'plurality-minority')
outcomes <- c('turnout', 'vs incumbent', 'vs opponent', 'prob. victory', 'eff. num. parties')

estimates <- data.frame(coef=coef,se=se,p_value=p_value,ci=ci,mean_control=mean_control,
                        bw=bw,n=nl+nr,N=Nl+Nr,
                        outcome = rep(outcomes, each = 3),
                        sample = rep(sample, each=1, times = 5))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(estimates))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable \\emph{turnout} is the constituency-level turnout rate at \\emph{t+1}, \\emph{vs incumbent} the vote share for the incumbent's party, \\emph{vs opponent} the vote share for the incumbent's strongest opponent (at \\emph{t}), \\emph{prob. victory} the incumbent's probability of winning, and \\emph{eff. num. parties} the effective number of parties. \\emph{RD estimate} is computed with local-linear regression within a symmetric MSE-optimal bandwidth. \\emph{se} is the conventional standard error, \\emph{p-value} and \\emph{95\\% CI} are robust bias-corrected. \\emph{mean control} indicates the average outcome at \\emph{t+1} in constituencies where ethnic minorities barely lost at \\emph{t}. \\emph{MSE opt bw} is the MSE-optimal bandwidth of minority vote-share winning margin around the victory threshold, \\emph{eff. N} is the sample size within the MSE-optimal bandwidth, \\emph{N} is the sample size. In \\emph{sample const}, \\emph{all} includes all constituencies, \\emph{majority-white} constituencies with an ethnic minority population share smaller than 20\\%, and \\emph{plurality-minority} constituencies with an ethnic minority population share greater than 20\\%. Election results data are from the Electoral Commission. The ethnic background of candidates is constructed by the authors.}\n", sep = ""))

# create column names for table
colnames(estimates) <- c('RD estimate', 'se', 'p-value', '95% CI', 'mean control', 'MSE opt bw', 'eff. N', 'N', 'outcome', 'sample const')

# print table to latex
print(xtable(estimates,
             align = "ccccccccccc",       
             caption = "Ethnic Minority Representation Effects on Vote Shares, Incumbent's Probability of Winning and Effective Number of Parties",
             label = "table:effect_vote_choice",
             digits = c(0,3,3,3,3,3,3,0,0,0,0)),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableD1.tex')


## Compute p-value of difference in coefficients test reported in Appendix D
estimates <- data.table(estimates)
diff_in_coeffs <- z_stat(estimates[outcome=='vs opponent' & sample=='majority-white', `RD estimate`],
                          estimates[outcome=='vs opponent' & sample=='plurality-minority', `RD estimate`],
                          estimates[outcome=='vs opponent' & sample=='majority-white', se],
                          estimates[outcome=='vs opponent' & sample=='plurality-minority', se])
print(2*pnorm(q=diff_in_coeffs, lower.tail=FALSE))



### Compute Table D.2: Ethnic Minority Representation Effects on Vote Choice: Evidence from Survey Data
# get BES data
bes <- bes_all[grep('^(E|W|S)', ons_id)]
bes_white <- bes[white==1]
bes_minority <- bes[white==0]

# define list of datasets and list of outcomes to compute RD estimates
data_list <- list(bes, bes_white, bes_minority)
outcome_list <- c('turnout_ge', 'vote_incumbent', 'vote_opposition')

# define vectors to store estimates
coef <- c()
se <- c()
p_value <- c()
cil <- c()
cir <- c()
ci <- c()
mean_control <- c()
bw <- c()
nl <- c()
nr <- c()
Nl <- c()
Nr <- c()
Ml <- c()
Mr <- c()

# Compute RD estimates for three datasets, three outcomes
for (j in seq(1,length(outcome_list),1)) {
  
  for (i in seq(1,length(data_list),1)) {
    
    data_r <- data_list[[i]]
    outcome <- outcome_list[j]
    
    
    main <- rdrobust(data_r[, get(outcome)],
                     data_r[, minority_victory_margin],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd",
                     cluster = data_r[, cluster],
                     nnmatch = 4)
    
    
    coef <- c(coef, main$coef[1,1])
    se <- c(se, main$se[1,1])
    p_value <- c(p_value, main$pv[3,1])
    cil <- c(cil, main$ci[3,1])
    cir <- c(cir, main$ci[3,2])
    ci <- c(ci, paste0('[', round(main$ci[3,1],3), ',', round(main$ci[3,2],3), ']'))
    mean_control <- c(mean_control, main$beta_p_l[1])
    bw <-  c(bw, main$bws[1])
    nl <- c(nl, main$N_h[1])
    nr <- c(nr, main$N_h[2])
    Nl <- c(Nl, main$N[1])
    Nr <- c(Nr, main$N[2])
    Ml <- c(Ml, main$M[1])
    Mr <- c(Mr, main$M[2])
    
  }
}

sample <- c('all', 'white', 'minority')
outcomes <- c('turnout', 'voted incumbent', 'voted opponent')
estimates <- data.frame(coef=coef,se=se,p_value=p_value,ci=ci,mean_control=mean_control,
                        bw=bw,n=nl+nr,N=Nl+Nr, M=Ml+Mr,
                        outcome = rep(outcomes, each = 3),
                        sample = rep(sample, each=1, times = 3))

# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(estimates))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The dependent variable \\emph{turnout} indicates whether a respondent turnout to vote in election \\emph{t+1}, \\emph{voted incumbent} whether they voted for the incumbent's party, and \\emph{voted opponent} for the party of the incumbent's strongest opponent (at \\emph{t}). \\emph{RD estimate} is computed with local-linear regression within a symmetric MSE-optimal bandwidth. \\emph{se} is the conventional standard error, \\emph{p-value} and \\emph{95\\% CI} are robust bias-corrected. \\emph{mean control} indicates the average outcome at \\emph{t+1} in constituencies where ethnic minorities barely lost at \\emph{t}. \\emph{MSE opt bw} is the MSE-optimal bandwidth of minority vote-share winning margin around the victory threshold, \\emph{eff. N} is the sample size within the MSE-optimal bandwidth, \\emph{N} is the sample size, and \\emph{Num. clusters} the number of constituency-elections. In \\emph{sample}, \\emph{all} includes all respondents, \\emph{white} respondents who self-identified as white, and \\emph{BAME} who self-identified as Black, Asian and minority ethnic. Survey data is from the British Election Study 2015, 2017, 2019, election results 2010, 2015, 2017 from the Electoral Commission. The ethnic background of candidates is constructed by the authors.}\n", sep = ""))

# create column names for table
colnames(estimates) <- c('RD estimate', 'se', 'p-value', '95% CI', 'mean control', 'MSE opt bw', 'eff. N', 'N', 'Num. clusters', 'outcome', 'sample')

# print table to latex
print(xtable(estimates,
             align = "cccccccccccc",       
             caption = "Ethnic Minority Representation Effects on Vote Choice: Evidence from Survey Data",
             label = "table:effect_vote_choice_bes",
             digits = c(0,3,3,3,3,3,3,0,0,0,0,0)),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableD2.tex')


### Compute Table E.1: Selection of Constituencies into the Sample
# compare statistics between all constituencies and in-sample constituencies

# get 2011 Census demographic data
demographic <- fread(file='./data/clean/census_demographic_data.csv')

# define in-sample constituencies
sample_const <- data[, .(ons_id, election, minority_victory_margin)]

# define vector of variables to compare statistics
baseline <- c('ethnicity_minority',
              'religion_minority',
              'density',
              'Single (never married or never registered a same-sex civil partnership)_%' ,
              'deprivation1', 'deprivation2', 'deprivation3', 'deprivation4',
              'grade_ab', 'gradea_c1' ,'grade_c2', 'grade_de',
              'Highest level of qualification: Level 1 qualifications_%' ,
              'Highest level of qualification: Level 2 qualifications_%' ,
              'Highest level of qualification: Level 3 qualifications_%' ,
              'Highest level of qualification: Level 4 qualifications and above_%' ,
              'Economically active: In employment_%' , 'Economically active: Unemployed_%' ,
              'Living rent free_%' , 'Owned_%' , 'Private rented_%' , 'Social rented_%' ,
              'english_not_main_language',
              'Other countries_%')

# define labels of variables 
baseline_labels <- c('share ethnic minority',
                     'share non-dominant religion',
                     'population density',
                     'share single' ,
                     'share deprivation level 1', 'share deprivation level 2', 'share deprivation level 3', 'share deprivation level 4',
                     'share social grade ab', 'share social gradea c1' ,'share social grade c2', 'share social grade de',
                     'share level 1 qualifications' ,
                     'share level 2 qualifications' ,
                     'share level 3 qualifications' ,
                     'share level 4+ qualifications' ,
                     'share economically active: employed',
                     'share economically active: unemployed',
                     'share tenure: rent free' , 'share tenure: owned' , 'share tenure: private rented' , 'share tenure: social rented',
                     'share English not main language',
                     'share immigrants: non-EU')

# compute table of summary statistics
election <- rep(c(2010,2015,2017), each=nrow(demographic))

data_stats <- unique(merge(cbind(do.call("rbind", replicate(3, demographic, simplify = FALSE)),election), sample_const,
                           by.x = c('geogcode','election'), by.y =c('ons_id','election'), all.x = TRUE), by=c('election', 'geogcode'))

# define in-sample constituency if running variable is defined
data_stats[,in_sample:=ifelse(!is.na(minority_victory_margin),1,0)]

# compute mean and sd for all constituencies and in-sample constituencies
summary_stats <- cbind(baseline_labels, data_stats[, .(lapply(.SD, mean, na.rm=TRUE), lapply(.SD, sd, na.rm=TRUE)), ,
                                                   .SDcols=baseline],
                       data_stats[in_sample==1, .(lapply(.SD, mean, na.rm=TRUE), lapply(.SD, sd, na.rm=TRUE)), ,
                                  .SDcols=baseline])
# add column names to table
names(summary_stats) <- c('variable', 'mean', 'sd','mean', 'sd')
# add number of observations to table
summary_stats <- rbind(summary_stats,
                       data.frame(variable ='N constituency-election',
                                  mean=data_stats[,.N], sd=data_stats[,.N], mean=data_stats[in_sample==1,.N],
                                  sd=data_stats[in_sample==1,.N]), use.names=FALSE)


# create footnotes for table
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(summary_stats))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: Notes: Shows descriptive statistics for all constituencies, and constituencies in our sample. Our sample includes constituencies where ethnic minority candidates contend a seat for Parliament against a white candidate. The unit of observation is a constituency-election year.}\n", sep = ""))

# print table to latex
print(xtable(summary_stats,
             align='llcccc',       
             caption = "Selection of Constituencies into the Sample",
             label = "table:comparison_nominal_sample",
             digits=c(0,0,3,3,3,3)),
      include.rownames=FALSE,
      caption.placement = "top",
      add.to.row = comment,
      timestamp = NULL,
      file='./output/tableE1.tex')



### Compute Figure A.1.a: Number of ethnic minority candidates contesting a seat in Parliament

# get data of all ethnic minority candidates by election year (independently of whether they contest against a white or an ethnic minority candidate, note therefore that `victory_margin` is the difference between the vote share of the ethnic minority candidate and the winner or the second winner if minority candidate wins election)
candidates <- fread('./data/clean/data_all_minority_candidates.csv')

# compute indicator of whether minority candidate wins election
candidates[, win:=ifelse(victory_margin>0,1,0)]

# compute and save plot
ggplot(candidates[, .(N =.N), by=.(win, election)]) + 
  aes(x=as.factor(election), y=N, fill=as.factor(win)) +
  geom_col(position='dodge') +
  xlab(NULL) + ylab('ethnic minority candidates') +
  scale_fill_grey(start=0.3, end=0.8, name = '', labels = c("lost", "won")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=12),
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=14),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave('./output/figureA1a.pdf', width=6, height=4.85)


### Compute Figure A.1.b: Number of ethnic minority candidates by political party
# compute and save plot
ggplot(candidates[, .(N =.N), by=.(party_name)]) + 
  aes(x=reorder(party_name,-N), y=N) +
  geom_col(position='dodge') +
  xlab(NULL) + ylab('ethnic minority candidates') +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=12),
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=14),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave('./output/figureA1b.pdf', width=6, height=4.85)



### Compute Figure A.2.a: Total number of ethnic minority candidates by parliamentary constituency
# get shapefile of constituencies
wm_boundaries <- st_read('./data/constituency shapefiles/Westminster_Parliamentary_Constituencies_(Dec_2017)_UK_BFE.shp')

# compute number of minority candidates by constituency in England, Scotland and Wales
wm_boundaries <- merge(wm_boundaries,candidates[, .(N =.N), by=.(ons_id)],by.x='PCON17CD',by.y='ons_id', all=TRUE)
wm_boundaries[is.na(wm_boundaries$N), 'N'] <- 0
wm_boundaries$country <- substr(wm_boundaries$PCON17CD,1,1)
wm_boundaries <- wm_boundaries[(wm_boundaries$country=='E' | wm_boundaries$country=='W' | wm_boundaries$country=='S'), ]

# plot map
ggplot() +
  geom_sf(data = wm_boundaries, aes(fill = N),lwd = 0) + 
  scale_fill_viridis_c(option = "magma", begin = 0.1) +
  labs(fill = paste("# minority", "candidates", sep='\n'))
ggsave('./output/figureA2a.pdf', width=6, height=4.85)


### Compute Figure A.2.b: Total number of ethnic minority candidates by sub-region of origin

# assign sub-region of origin based on granparents', parents', candidates' country of origin
candidate_characteristics <- melt(candidates[, .(ons_id, election, candidate_name, birthcountry_father,
                                                 birthcountry_mother, birthcountry_grandparent1, birthcountry_grandparent2, birthcountry)],
                                  id=c('ons_id', 'election', 'candidate_name'), variable.name='original_variable')
candidate_characteristics <- rbindlist(list(
  candidate_characteristics,
  melt(candidates[, .(ons_id, election, candidate_name, determined_ethnicity, detrace)],
       id=c('ons_id', 'election', 'candidate_name'), variable.name='original_variable')
))
candidate_characteristics <- candidate_characteristics[value!='']

# map countries and nationalities into regions
# get data with mappings
country_continent_region <- fread("./data/nationality mapping/country_continent_region.txt")
country_continent_region[,name:=tolower(name)]
country_continent_region[!is.na(`intermediate-region`), sub_region:=tolower(`intermediate-region`)]
country_continent_region[sub_region=='', sub_region:=tolower(`sub-region`)]
country_region <- country_continent_region[,.(name, sub_region)]
region_region <- unique(country_continent_region[, .(sub_region, sub_region)])
country_region <- rbindlist(list(country_region, region_region), use.names=F)

country_nationalities <- fread("./data/nationality mapping/country_nationalities.csv", header=T)
country_nationalities <- country_nationalities[, .(name=tolower(country), nationality=tolower(nationality))]
country_nationalities <- merge(country_nationalities, country_region, by.x='name', by.y='name')[, .(name=nationality, sub_region)]
country_region <- rbindlist(list(country_region, country_nationalities), use.names=F)

# manually defined mapping for countries/nationalities missing from mapping
other_region <- data.table(
  name=c('bengali', 'bengal', 'viet nam', 'vietnam', 'burma', 'iran', 
         'iranian', 'syria', 'trinidad', 'kurdish'
  ), 
  region=c('southern asia', 'southern asia', 'south-eastern asia', 'south-eastern asia', 'south-eastern asia', 'southern asia', 
           'southern asia', 'western asia', 'caribbean', 'western asia'
  )
)
country_region <- rbindlist(list(country_region, other_region), use.names=F)

# merge candidates data with mapping
candidate_characteristics <- merge(
  candidate_characteristics[!(value %in% c('african', 'asian', 'unknown', 'black', 'other', 'bme', 'caribbean'))], 
  country_region, by.x='value', by.y='name', all.x=T
)

# manually clean cases that are mapped into more than one sub-region
candidate_characteristics <- unique(candidate_characteristics[!is.na(sub_region)],
                                    by=c('ons_id', 'election', 'candidate_name', 'sub_region'))
candidate_characteristics[candidate_name=='Ash Rehal', sub_region:='southern asia']
candidate_characteristics[candidate_name=='Bhavna Joshi', sub_region:='southern asia']
candidate_characteristics[candidate_name=="Brendan D'Cruz", sub_region:='southern asia']
candidate_characteristics[candidate_name=='Faiza Shaheen', sub_region:='southern asia']
candidate_characteristics[candidate_name=='Janet Daby', sub_region:='caribbean']
candidate_characteristics[candidate_name=='Priti Patel', sub_region:='southern asia']
candidate_characteristics[candidate_name=='Rana Das-Gupta', sub_region:='south-eastern asia']
candidate_characteristics[candidate_name=='Shailesh Vara', sub_region:='southern asia']
candidate_characteristics[(candidate_name=='Suella Braverman' | candidate_name=='Suella Fernandes'), sub_region:='southern asia']
candidate_characteristics[candidate_name=='Valerie Vaz', sub_region:='southern asia']
candidate_characteristics <- candidate_characteristics[order(candidate_name),.(sub_region=unique(sub_region)), by=.(election,ons_id,candidate_name)]

# compute and save plot
ggplot(candidate_characteristics[, .(N =.N), by=.(sub_region)]) + 
  aes(x=reorder(sub_region,-N), y=N) +
  geom_col(position='dodge') +
  xlab(NULL) + ylab('ethnic minority candidates') +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=12),
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=14),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave('./output/figureA2b.pdf', width=6, height=4.85)



### Compute Figure B.1.a: Continuity of predetermined variables around the victory threshold (Continuity of means using local linear regression) 
# rename data
turnout_s <- data

# define whether candidate's party is left-leaning
turnout_s[, party_left:=ifelse(party_name!='Conservative',1,0)]

# define vector of predetermined characteristics
baseline <- c('incumbent',
              'candidate_female',
              'party_left',
              'ethnicity_minority',
              'density',
              'Single (never married or never registered a same-sex civil partnership)_%',
              'deprivation1',
              'deprivation2',
              'deprivation3',
              'deprivation4',
              'grade_ab',
              'gradea_c1',
              'grade_c2',
              'grade_de',
              'Economically Inactive_%',
              'Economically active: In employment_%',
              'Economically active: Unemployed_%',
              'Living rent free_%',
              'Owned_%',
              'Social rented_%',
              'Private rented_%',
              'english_not_main_language',
              'Other countries_%')

# compute RD estimates on predetermined characteristics and store p-values of estimates
baseline_pv <- c()

data_r <- turnout_s
for (i in baseline) {
  rdout <- rdrobust(data_r[,get(i)],
                    data_r[,minority_victory_margin],
                    kernel = "triangular",
                    p = 1,
                    bwselect = "mserd")
  
  baseline_pv <- c(baseline_pv, rdout$pv[3])
  
}

# define labels of predetermined characteristics
baseline_labels <- c('incumbent candidate',
                     'female candidate',
                     'left-leaning candidate',
                     '% ethnic minority',
                     'population density',
                     '% single' ,
                     '% deprivation level 1', '% deprivation level 2', '% deprivation level 3', '% deprivation level 4',
                     '% social grade ab', '% social gradea c1' ,'% social grade c2', '% social grade de',
                     '% economically inactive',
                     '% economically active: employed',
                     '% economically active: unemployed',
                     '% tenure: rent free' , '% tenure: owned' , '% tenure: social rented', '% tenure: private rented', 
                     '% English not main language',
                     '% immigrants: non-EU')

# compute data for plot
data_graph <- data.frame('baseline'=baseline_labels, 'p_values'=baseline_pv)
data_graph <- data.table(data_graph)
data_graph <- data_graph[order(p_values)]

# compute plot
ggplot(data_graph) +
  aes(x=p_values, y=reorder(baseline,-p_values)) +
  geom_point(colour='#0000FF', size=1.5) +
  geom_vline(aes(xintercept=0.05),
             linetype='longdash', colour='#666666') +
  ggtitle('Continuity of means predetermined variable') +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(size=10), axis.text.y = element_text(size=9),
        axis.title.x = element_text(size =12)) +
  xlab('p-value of test for continuity of means around cutoff') +
  ylab(NULL)
ggsave('./output/figureB1a.pdf', width=6.3, height=4.85)


### Compute Figure B.1.b: Continuity of predetermined variables around the victory threshold (Continuity of distribution using asymptotic permutation test) 

# compute estimates of test of continuity of distribution of predetermined characteristics
set.seed(234)
permtest <- RDperm(W = baseline,
                   z = "minority_victory_margin",
                   data = turnout_s)

# print permutation test results
summary(permtest)
# store p-values of permutation test
baseline_pv <- unname(permtest$results[,2])
# compute data for plot
data_graph <- data.frame('baseline'=baseline_labels, 'p_values'=baseline_pv[1:length(baseline_pv)-1])
data_graph <- data.table(data_graph)
data_graph <- data_graph[order(p_values)]

# compute plot
ggplot(data_graph) +
  aes(x=p_values, y=reorder(baseline,-p_values)) +
  geom_point(colour='#0000FF', size=2) +
  geom_vline(aes(xintercept=0.05),
             linetype='longdash', colour='#666666') +
  ggtitle('Continuity of distribution predetermined variable') +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(size=10), axis.text.y = element_text(size=9),
        axis.title.x = element_text(size =12)) +
  xlab('p-value of test for continuity of distribution around cutoff') +
  ylab(NULL)
ggsave('./output/figureB1b.pdf', width=6.3, height=4.85)



### Compute Figure B.2. a, b, c: Continuity in the density of candidates around the cutoff (with linear, quadratic and cubic approximations, respectively)

# compute continuity of density test with linear approximation
outden <- rddensity(data[, minority_victory_margin], p=1)

# compute and save plot
pden <- rdplotdensity(outden, data[, minority_victory_margin],
                      hist = FALSE,
                      lcol = '#0000FF', CIcol = 1, xlabel = "Margin of victory",
                      ylabel = 'Density of candidates', title = 'Local linear approximation')
pden$Estplot + 
  geom_vline(aes(xintercept=0),
             linetype='solid', colour='#666666') +
  ggplot2::annotate(geom="text", x=-20, y=0.0005, label=paste(paste("Density continuity test", "p-value = ", sep='\n'),round(outden$test$p_jk, 2), sep='\n')) +
  scale_x_continuous(breaks=seq(-50,50,10)) +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 12),
        axis.title.x = element_text(size =12), axis.title.y = element_text(size =12),
        axis.text.y = element_text(size = 12))
ggsave('./output/figureB2a.pdf', width=6, height=4.85)


# compute continuity of density test with quadratic approximation
outden <- rddensity(data[, minority_victory_margin], p=2)

# compute and save plot
pden <- rdplotdensity(outden, data[, minority_victory_margin],
                      hist = FALSE,
                      lcol = '#0000FF', CIcol = 1, xlabel = "Margin of victory",
                      ylabel = 'Density of candidates', title = 'Local quadratic approximation')
pden$Estplot + 
  geom_vline(aes(xintercept=0),
             linetype='solid', colour='#666666') +
  ggplot2::annotate(geom="text", x=-20, y=0.0005, label=paste(paste("Density continuity test", "p-value = ", sep='\n'),round(outden$test$p_jk, 2), sep='\n')) +
  scale_x_continuous(breaks=seq(-50,50,10)) +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 12),
        axis.title.x = element_text(size =12), axis.title.y = element_text(size =12),
        axis.text.y = element_text(size = 12))
ggsave('./output/figureB2b.pdf', width=6, height=4.85)


# compute continuity of density test with cubic approximation
outden <- rddensity(data[, minority_victory_margin], p=3)

# compute and save plot
pden <- rdplotdensity(outden, data[, minority_victory_margin],
                      hist = FALSE,
                      lcol = '#0000FF', CIcol = 1, xlabel = "Margin of victory",
                      ylabel = 'Density of candidates', title = 'Local cubic approximation')
pden$Estplot + 
  geom_vline(aes(xintercept=0),
             linetype='solid', colour='#666666') +
  ggplot2::annotate(geom="text", x=-50, y=0.000, label=paste(paste("Density continuity test", "p-value = ", sep='\n'),round(outden$test$p_jk, 2), sep='\n')) +
  scale_x_continuous(breaks=seq(-80,80,20)) +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 12),
        axis.title.x = element_text(size =12), axis.title.y = element_text(size =12),
        axis.text.y = element_text(size = 12))
ggsave('./output/figureB2c.pdf', width=6, height=4.85)




### Compute Figure B.3 a, b, c: Sensitivity to the choice of bandwidth (among all constituencies, majority-white, plurality-minority, respectively)
# rename data
turnout <- data
# define vector of covars (main analysis Table 2)
covars <- c('ethnicity_minority', 'gradea_c1', 'deprivation1', 'density', 'candidates_Labour', 'incumbent')

# compute data for majority-white, plurality-minority constituencies
turnout_majority <- turnout[ethnicity_minority<20]
turnout_minority <- turnout[ethnicity_minority>=20]

# compute RD estimates with MSE-optimal bandwidth, half, three fourths, five fourths and one and a half times the MSE-optimal size, and store estimates for plot
main_mse <- rdrobust(turnout[, turnout],
                     turnout[, minority_victory_margin],
                     covs = turnout[, covars, with = FALSE],
                     p = 1,
                     kernel = "triangular",
                     bwselect = "mserd")
data_main_mse <- cbind('MSE' , main_mse$bws[1], main_mse$coef[1], main_mse$ci[3,1], main_mse$ci[3,2])

main_mse.5 <- rdrobust(turnout[, turnout],
                       turnout[, minority_victory_margin],
                       covs = turnout[, covars, with = FALSE],
                       p = 1,
                       kernel = "triangular",
                       h = 0.5*main_mse$bws[1])
data_main_mse.5 <- cbind('0.5*MSE' , main_mse.5$bws[1], main_mse.5$coef[1], main_mse.5$ci[3,1], main_mse.5$ci[3,2])

main_mse.75 <- rdrobust(turnout[, turnout],
                        turnout[, minority_victory_margin],
                        covs = turnout[, covars, with = FALSE],
                        p = 1,
                        kernel = "triangular",
                        h = 0.75*main_mse$bws[1])
data_main_mse.75 <- cbind('0.75*MSE' , main_mse.75$bws[1], main_mse.75$coef[1], main_mse.75$ci[3,1], main_mse.75$ci[3,2])

main_mse1.25 <- rdrobust(turnout[, turnout],
                         turnout[, minority_victory_margin],
                         covs = turnout[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         h = 1.25*main_mse$bws[1])
data_main_mse1.25 <- cbind('1.25*MSE' , main_mse1.25$bws[1], main_mse1.25$coef[1], main_mse1.25$ci[3,1], main_mse1.25$ci[3,2])

main_mse1.5 <- rdrobust(turnout[, turnout],
                        turnout[, minority_victory_margin],
                        covs = turnout[, covars, with = FALSE],
                        p = 1,
                        kernel = "triangular",
                        h = 1.5*main_mse$bws[1])
data_main_mse1.5 <- cbind('1.5*MSE' , main_mse1.5$bws[1], main_mse1.5$coef[1], main_mse1.5$ci[3,1], main_mse1.5$ci[3,2])

## majority-white
main_maj_mse <- rdrobust(turnout_majority[, turnout],
                         turnout_majority[, minority_victory_margin],
                         covs = turnout_majority[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         bwselect = "mserd")
data_main_maj_mse <- cbind('MSE' , main_maj_mse$bws[1], main_maj_mse$coef[1], main_maj_mse$ci[3,1], main_maj_mse$ci[3,2])

main_maj_mse.5 <- rdrobust(turnout_majority[, turnout],
                           turnout_majority[, minority_victory_margin],
                           covs = turnout_majority[, covars, with = FALSE],
                           p = 1,
                           kernel = "triangular",
                           h = 0.5*main_maj_mse$bws[1])
data_main_maj_mse.5 <- cbind('0.5*MSE' , main_maj_mse.5$bws[1], main_maj_mse.5$coef[1], main_maj_mse.5$ci[3,1], main_maj_mse.5$ci[3,2])

main_maj_mse.75 <- rdrobust(turnout_majority[, turnout],
                            turnout_majority[, minority_victory_margin],
                            covs = turnout_majority[, covars, with = FALSE],
                            p = 1,
                            kernel = "triangular",
                            h = 0.75*main_maj_mse$bws[1])
data_main_maj_mse.75 <- cbind('0.75*MSE' , main_maj_mse.75$bws[1], main_maj_mse.75$coef[1], main_maj_mse.75$ci[3,1], main_maj_mse.75$ci[3,2])

main_maj_mse1.25 <- rdrobust(turnout_majority[, turnout],
                             turnout_majority[, minority_victory_margin],
                             covs = turnout_majority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             h = 1.25*main_maj_mse$bws[1])
data_main_maj_mse1.25 <- cbind('1.25*MSE' , main_maj_mse1.25$bws[1], main_maj_mse1.25$coef[1], main_maj_mse1.25$ci[3,1], main_maj_mse1.25$ci[3,2])

main_maj_mse1.5 <- rdrobust(turnout_majority[, turnout],
                            turnout_majority[, minority_victory_margin],
                            covs = turnout_majority[, covars, with = FALSE],
                            p = 1,
                            kernel = "triangular",
                            h = 1.5*main_maj_mse$bws[1])
data_main_maj_mse1.5 <- cbind('1.5*MSE' , main_maj_mse1.5$bws[1], main_maj_mse1.5$coef[1], main_maj_mse1.5$ci[3,1], main_maj_mse1.5$ci[3,2])

## majority-minority
main_min_mse <- rdrobust(turnout_minority[, turnout],
                         turnout_minority[, minority_victory_margin],
                         covs = turnout_minority[, covars, with = FALSE],
                         p = 1,
                         kernel = "triangular",
                         bwselect = "mserd")
data_main_min_mse <- cbind('MSE' , main_min_mse$bws[1], main_min_mse$coef[1], main_min_mse$ci[3,1], main_min_mse$ci[3,2])

main_min_mse.5 <- rdrobust(turnout_minority[, turnout],
                           turnout_minority[, minority_victory_margin],
                           covs = turnout_minority[, covars, with = FALSE],
                           p = 1,
                           kernel = "triangular",
                           h = 0.5*main_min_mse$bws[1])
data_main_min_mse.5 <- cbind('0.5*MSE' , main_min_mse.5$bws[1], main_min_mse.5$coef[1], main_min_mse.5$ci[3,1], main_min_mse.5$ci[3,2])

main_min_mse.75 <- rdrobust(turnout_minority[, turnout],
                            turnout_minority[, minority_victory_margin],
                            covs = turnout_minority[, covars, with = FALSE],
                            p = 1,
                            kernel = "triangular",
                            h = 0.75*main_min_mse$bws[1])
data_main_min_mse.75 <- cbind('0.75*MSE' , main_min_mse.75$bws[1], main_min_mse.75$coef[1], main_min_mse.75$ci[3,1], main_min_mse.75$ci[3,2])

main_min_mse1.25 <- rdrobust(turnout_minority[, turnout],
                             turnout_minority[, minority_victory_margin],
                             covs = turnout_minority[, covars, with = FALSE],
                             p = 1,
                             kernel = "triangular",
                             h = 1.25*main_min_mse$bws[1])
data_main_min_mse1.25 <- cbind('1.25*MSE' , main_min_mse1.25$bws[1], main_min_mse1.25$coef[1], main_min_mse1.25$ci[3,1], main_min_mse1.25$ci[3,2])

main_min_mse1.5 <- rdrobust(turnout_minority[, turnout],
                            turnout_minority[, minority_victory_margin],
                            covs = turnout_minority[, covars, with = FALSE],
                            p = 1,
                            kernel = "triangular",
                            h = 1.5*main_min_mse$bws[1])
data_main_min_mse1.5 <- cbind('1.5*MSE' , main_min_mse1.5$bws[1], main_min_mse1.5$coef[1], main_min_mse1.5$ci[3,1], main_min_mse1.5$ci[3,2])

# compute plots of sensitivity to choice of bandwidth
# all constituencies
data_main <- as.data.frame(rbind(data_main_mse.5, data_main_mse.75, data_main_mse, data_main_mse1.25, data_main_mse1.5))
colnames(data_main) <- c('type', 'bandwidth', 'coef', 'ci_l', 'ci_u')

ggplot(data_main) +
  aes(x=as.numeric(as.character(bandwidth)), y=as.numeric(as.character(coef))) +
  geom_point() +
  geom_errorbar(aes(ymin=as.numeric(as.character(ci_l)), ymax=as.numeric(as.character(ci_u))), alpha=0.4) +
  scale_x_continuous(breaks=c(main_mse.5$bws[1], main_mse.75$bws[1], main_mse$bws[1], main_mse1.25$bws[1], main_mse1.5$bws[1]),
                     labels = c(paste('0.5*MSE',round(main_mse.5$bws[1],1),sep='\n'),
                                paste('0.75*MSE',round(main_mse.75$bws[1],1),sep='\n'),
                                paste('MSE',round(main_mse$bws[1],1),sep='\n'),
                                paste('1.25*MSE',round(main_mse1.25$bws[1],1),sep='\n'),
                                paste('1.5*MSE',round(main_mse1.5$bws[1],1),sep='\n'))) +
  geom_hline(aes(yintercept=0),
             linetype='dashed', colour='#666666') +
  ggtitle("All constituencies") +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size =14), axis.title.y = element_text(size =14),
        axis.text.x = element_text(size=10, angle = 90), axis.text.y = element_text(size = 10)) +
  xlab(NULL) +
  ylab('RD effect on turnout rate')
ggsave('./output/figureB3a.pdf', width=6, height=4.85)

# majority-white constituencies
data_main_maj <- as.data.frame(rbind(data_main_maj_mse.5, data_main_maj_mse.75, data_main_maj_mse, data_main_maj_mse1.25, data_main_maj_mse1.5))
colnames(data_main_maj) <- c('type', 'bandwidth', 'coef', 'ci_l', 'ci_u')

ggplot(data_main_maj) +
  aes(x=as.numeric(as.character(bandwidth)), y=as.numeric(as.character(coef))) +
  geom_point() +
  geom_errorbar(aes(ymin=as.numeric(as.character(ci_l)), ymax=as.numeric(as.character(ci_u))), alpha=0.4) +
  scale_x_continuous(breaks=c(main_maj_mse.5$bws[1], main_maj_mse.75$bws[1], main_maj_mse$bws[1], main_maj_mse1.25$bws[1], main_maj_mse1.5$bws[1]),
                     labels = c(paste('0.5*MSE',round(main_maj_mse.5$bws[1],1),sep='\n'),
                                paste('0.75*MSE',round(main_maj_mse.75$bws[1],1),sep='\n'),
                                paste('MSE',round(main_maj_mse$bws[1],1),sep='\n'),
                                paste('1.25*MSE',round(main_maj_mse1.25$bws[1],1),sep='\n'),
                                paste('1.5*MSE',round(main_maj_mse1.5$bws[1],1),sep='\n'))) +
  geom_hline(aes(yintercept=0),
             linetype='dashed', colour='#666666') +
  ggtitle("Plurality-white constituencies") +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size =14), axis.title.y = element_text(size =14),
        axis.text.x = element_text(size=10, angle = 90), axis.text.y = element_text(size = 10)) +
  xlab(NULL) +
  ylab('RD effect on turnout rate')
ggsave('./output/figureB3b.pdf', width=6, height=4.85)

# plurality-minority constituencies
data_main_min <- as.data.frame(rbind(data_main_min_mse.5, data_main_min_mse.75, data_main_min_mse, data_main_min_mse1.25, data_main_min_mse1.5))
colnames(data_main_min) <- c('type', 'bandwidth', 'coef', 'ci_l', 'ci_u')

ggplot(data_main_min) +
  aes(x=as.numeric(as.character(bandwidth)), y=as.numeric(as.character(coef))) +
  geom_point() +
  geom_errorbar(aes(ymin=as.numeric(as.character(ci_l)), ymax=as.numeric(as.character(ci_u))), alpha=0.4) +
  scale_x_continuous(breaks=c(main_min_mse.5$bws[1], main_min_mse.75$bws[1], main_min_mse$bws[1], main_min_mse1.25$bws[1], main_min_mse1.5$bws[1]),
                     labels = c(paste('0.5*MSE',round(main_min_mse.5$bws[1],1),sep='\n'),
                                paste('0.75*MSE',round(main_min_mse.75$bws[1],1),sep='\n'),
                                paste('MSE',round(main_min_mse$bws[1],1),sep='\n'),
                                paste('1.25*MSE',round(main_min_mse1.25$bws[1],1),sep='\n'),
                                paste('1.5*MSE',round(main_min_mse1.5$bws[1],1),sep='\n'))) +
  geom_hline(aes(yintercept=0),
             linetype='dashed', colour='#666666') +
  ggtitle("Plurality-minority constituencies") +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size =14), axis.title.y = element_text(size =14),
        axis.text.x = element_text(size=10, angle = 90), axis.text.y = element_text(size = 10)) +
  xlab(NULL) +
  ylab('RD effect on turnout rate')
ggsave('./output/figureB3c.pdf', width=6, height=4.85)



### Compute Figure B.4.a: Sensitivity to polynomial choice
# compute RD estimates with polynomial order 2 in all constituencies, majority-white, plurality-minority, respectively
main_control <- rdrobust(turnout[, turnout],
                         turnout[, minority_victory_margin],
                         covs = turnout[, covars, with = FALSE],
                         p = 2,
                         kernel = "triangular",
                         bwselect = "mserd")

main_maj_control <- rdrobust(turnout_majority[, turnout],
                             turnout_majority[, minority_victory_margin],
                             covs = turnout_majority[, covars, with = FALSE],
                             p = 2,
                             kernel = "triangular",
                             bwselect = "mserd")

main_min_control <- rdrobust(turnout_minority[, turnout],
                             turnout_minority[, minority_victory_margin],
                             covs = turnout_minority[, covars, with = FALSE],
                             p = 2,
                             kernel = "triangular",
                             bwselect = "mserd")

# store estimates for plot
coef <- c(main_control$coef[1,1], main_maj_control$coef[1,1], main_min_control$coef[1,1])
cil <- c(main_control$ci[3,1], main_maj_control$ci[3,1], main_min_control$ci[3,1])
cir <- c(main_control$ci[3,2], main_maj_control$ci[3,2], main_min_control$ci[3,2])

data_plot <- data.frame(coef = coef, cil=cil, cir=cir, sample=factor(c('all', 'majority-white', 'plurality-minority'),
                                                                     levels=c('all', 'majority-white', 'plurality-minority')))

# plot estimates
ggplot(data_plot) +
  aes(x=sample, y=coef) +
  geom_point(size=1) +
  geom_errorbar(aes(ymin=cil, ymax=cir), size=0.5, alpha=0.4) +
  geom_hline(aes(yintercept=0),
             linetype='dashed', colour='#666666', size=0.3) +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size =14), axis.title.y = element_text(size =14),
        axis.text.x = element_text(size=10), axis.text.y = element_text(size = 10)) +
  xlab('') +
  ylab('RD effect on turnout rate')
ggsave('./output/figureB4a.pdf', width=6, height=4.85)



### Compute Figure B.4.b: Placebo cutoffs
# compute RD estimates for placebo cutoffs

# define vectors to store estimates
cutoff <- c()
rd_coef_tr <- c()
rd_pv_tr <- c()
rd_ci_l_tr <-c()
rd_ci_u_tr <-c()
thresholds <- seq(10,40,15)

# compute estimates
for (j in seq(1, length(thresholds))) {
  i <- thresholds[j]
  rdout <- rdrobust(turnout[minority_victory_margin>0, turnout],
                    turnout[minority_victory_margin>0, minority_victory_margin],
                    covs = turnout[minority_victory_margin>0,
                                c(covars), with=F],                          
                    kernel = "triangular", p = 1, bwselect = "mserd",
                    c=i)
  
  rd_coef_tr <- c(rd_coef_tr, rdout$coef[1])
  rd_pv_tr <- c(rd_pv_tr, rdout$pv[3])
  rd_ci_l_tr <-c(rd_ci_l_tr, rdout$ci[3,1])
  rd_ci_u_tr <-c(rd_ci_u_tr, rdout$ci[3,2])
  cutoff <- c(cutoff, i)
}

# store estimates in data for plot
data_tr <- cbind(cutoff, rd_coef_tr, rd_ci_l_tr, rd_ci_u_tr)
colnames(data_tr) <- c('cutoff', 'coef', 'ci_l', 'ci_u')

# define vectors to store estimates
cutoff <- c()
rd_coef_co <- c()
rd_pv_co <- c()
rd_ci_l_co <-c()
rd_ci_u_co <-c()
thresholds <- -seq(10,40,15)

# compute estimates
for (j in seq(1, length(thresholds))) {
  i <- thresholds[j]
  rdout <- rdrobust(turnout[minority_victory_margin<0, turnout],
                    turnout[minority_victory_margin<0, minority_victory_margin],
                    covs = turnout[minority_victory_margin<0,
                                c(covars),with=F],
                    kernel = "triangular", p = 1, bwselect = "mserd",
                    c=i)
  
  rd_coef_co <- c(rd_coef_co, rdout$coef[1])
  rd_pv_co <- c(rd_pv_co, rdout$pv[3])
  rd_ci_l_co <-c(rd_ci_l_co, rdout$ci[3,1])
  rd_ci_u_co <-c(rd_ci_u_co, rdout$ci[3,2])
  cutoff <- c(cutoff, i)
}

# store estimates in data for plot
data_co <- cbind(cutoff, rd_coef_co, rd_ci_l_co, rd_ci_u_co)
colnames(data_co) <- c('cutoff', 'coef', 'ci_l', 'ci_u')

# compute RD estimate with true cutoff
rdtrue <- rdrobust(turnout[, turnout],
                   turnout[, minority_victory_margin],
                   covs = turnout[, c(covars), with=F],
                   kernel = "triangular", p = 1, bwselect = "mserd",
                   c=0)
data_true <- cbind(rdtrue$c, rdtrue$coef[1], rdtrue$ci[3,1], rdtrue$ci[3,2])
colnames(data_true) <- c('cutoff', 'coef', 'ci_l', 'ci_u')

# plot placebo cutoffs estimates
placebo_cutoffs <- rbind(data_co, data_true, data_tr)
placebo_cutoffs <- as.data.frame(placebo_cutoffs)

ggplot(placebo_cutoffs) +
  aes(x=cutoff, y=coef) +
  geom_point(size=1) +
  geom_errorbar(aes(ymin=ci_l, ymax=ci_u), size=0.5, alpha=0.4) +
  scale_x_continuous(breaks=seq(-45,45,15)) +
  geom_hline(aes(yintercept=0),
             linetype='dashed', colour='#666666', size=0.3) +
  #ggtitle("Placebo Cutoffs") +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size =14), axis.title.y = element_text(size =14),
        axis.text.x = element_text(size=10), axis.text.y = element_text(size = 10)) +
  xlab('Cutoff (x=0 true cutoff)') +
  ylab('RD effect on turnout rate')
ggsave('./output/figureB4b.pdf', width=6, height=4.85)



### Compute Figure B.5 a, b, c: Description of 2015, 2017, 2019 Great Britain general elections (vote share of winning candidate, effective number of candidates, margin of victory, respectively)

# get 2015, 2017, 2019 general election results data for all constituencies in England, Scotland and Wales
elections <- fread('./data/clean/ge_2015_2017_2019_ESW.csv')

# compute plots
ggplot(elections, aes(x=vote_share_winner)) + 
  geom_histogram(color="black", fill="white", bins = 30) +
  scale_x_continuous(breaks = seq(0, 1,.1)) +
  xlab('vote share winning candidate') +
  theme(axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))
ggsave('./output/figureB5a.pdf', width=6, height=4.85)

ggplot(elections, aes(x=enop)) + 
  geom_histogram(color="black", fill="white", bins = 30) +
  scale_x_continuous(breaks = seq(0, 6, 1)) +
  xlab('effective number of candidates') +
  theme(axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))
ggsave('./output/figureB5b.pdf', width=6, height=4.85)

ggplot(elections, aes(x=margin)) + 
  geom_histogram(color="black", fill="white", bins = 30) +
  scale_x_continuous(breaks = seq(0, 1,.1)) +
  xlab('victory margin') +
  theme(axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))
ggsave('./output/figureB5c.pdf', width=6, height=4.85)


# Description of statistics in Appendix B.7.1: proportion of elections won by less than 50%/> 2 ENOP/Win margin under 20
elections[(vote_share_winner)<.5, .N]/elections[!is.na(vote_share_winner), .N]
elections[, summary(vote_share_winner)]
elections[(enop)>2, .N]/elections[!is.na(enop), .N]
elections[!is.infinite(enop), summary(enop)]
elections[(margin)<.2, .N]/elections[!is.na(margin), .N]



#### Compute Figure B.6: Sensitivity to the definition of majority-white/plurality-minority constituencies
## Compute RD estimates by iteratively redefining the ethnic minority population share used as threshold to define majority-white/plurality-minority constituencies  
# rename data
turnout <- data

# define vectors to store estimates
coef <- c()
se <- c()
p_value <- c()
cil <- c()
cir <- c()
mean_control <- c()
bw <- c()
nl <- c()
nr <- c()
Nl <- c()
Nr <- c()
Ml <- c()
Mr <- c()
th <- c()

# compute estimates for plurality-minority constituencies
for (j in seq(20, 35, 5)) {
  
  turnout_minority <- turnout[ethnicity_minority>=j]
  
  main_min_control <- rdrobust(turnout_minority[, turnout],
                               turnout_minority[, minority_victory_margin],
                               covs = turnout_minority[, covars, with = FALSE],
                               p = 1,
                               kernel = "triangular",
                               bwselect = "mserd")
  
  coef <- c(coef, main_min_control$coef[1,1])
  se <- c(se, main_min_control$se[1,1])
  p_value <- c(p_value, main_min_control$pv[3,1])
  cil <- c(cil, main_min_control$ci[3,1])
  cir <- c(cir, main_min_control$ci[3,2])
  mean_control <- c(mean_control, main_min_control$beta_p_l[1])
  bw <-  c(bw, main_min_control$bws[1])
  nl <- c(nl, main_min_control$N_h[1])
  nr <- c(nr, main_min_control$N_h[2])
  Nl <- c(Nl, main_min_control$N[1])
  Nr <- c(Nr, main_min_control$N[2])
  Ml <- c(Ml, main_min_control$M[1])
  Mr <- c(Mr, main_min_control$M[2])
  
}

# save estimates in data frame to compute plot
estimates_min <- data.frame(coef=coef,se=se,p_value=p_value,cil=cil,cir=cir,mean_control=mean_control,
                                    bw=bw,n=nl+nr,N=Nl+Nr, M=Ml+Mr,
                                    cov = rep(c('yes'), length(coef)),
                                    threshold = rep(seq(20, 35, 5), each=1),
                                    sample = rep(rep(c('plurality-minority'), each=1), length(seq(20,35,5))))

# define vectors to store estimates
coef <- c()
se <- c()
p_value <- c()
cil <- c()
cir <- c()
mean_control <- c()
bw <- c()
nl <- c()
nr <- c()
Nl <- c()
Nr <- c()
Ml <- c()
Mr <- c()
th <- c()

# compute estimates for majority-white constituencies
for (j in seq(20, 80, 5)) {
  
  turnout_majority <- turnout[ethnicity_minority<j]

  
  main_maj_control <- rdrobust(turnout_majority[, turnout],
                               turnout_majority[, minority_victory_margin],
                               covs = turnout_majority[, covars, with = FALSE],
                               p = 1,
                               kernel = "triangular",
                               bwselect = "mserd")
  
  coef <- c(coef, main_maj_control$coef[1,1])
  se <- c(se, main_maj_control$se[1,1])
  p_value <- c(p_value, main_maj_control$pv[3,1])
  cil <- c(cil, main_maj_control$ci[3,1])
  cir <- c(cir, main_maj_control$ci[3,2])
  mean_control <- c(mean_control,main_maj_control$beta_p_l[1])
  bw <-  c(bw, main_maj_control$bws[1])
  nl <- c(nl, main_maj_control$N_h[1])
  nr <- c(nr, main_maj_control$N_h[2])
  Nl <- c(Nl, main_maj_control$N[1])
  Nr <- c(Nr, main_maj_control$N[2])
  Ml <- c(Ml, main_maj_control$M[1])
  Mr <- c(Mr, main_maj_control$M[2])
  
}

# save estimates in data frame to compute plot
estimates_maj <- data.frame(coef=coef,se=se,p_value=p_value,cil=cil,cir=cir,mean_control=mean_control,
                                   bw=bw,n=nl+nr,N=Nl+Nr, M=Ml+Mr,
                                   cov = rep(c('yes'), length(coef)),
                                   threshold = rep(seq(20, 80, 5), each=1),
                                   sample = rep(c('majority-white'), length(seq(20,80,5))))

estimates <- rbind(estimates_min, estimates_maj)

setDT(estimates)

# compute plots
ggplot(estimates[sample=='majority-white' & cov=='yes']) +
  aes(x=threshold, y=coef) +
  geom_point(size=1) +
  geom_errorbar(aes(ymin=cil, ymax=cir), size=0.5, alpha=0.4) +
  scale_x_continuous(breaks=seq(20,80,5)) +
  geom_hline(aes(yintercept=0),
             linetype='dashed', colour='#666666', size=0.3) +
  ggtitle("Majority white") +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size =14), axis.title.y = element_text(size =14),
        axis.text.x = element_text(size=10), axis.text.y = element_text(size = 10)) +
  xlab('Ethnic minority population threshold') +
  ylab('RD effect on turnout rate')
ggsave('./output/figureB6a.pdf', width=6, height=4.85)

ggplot(estimates[sample=='plurality-minority' & cov=='yes']) +
  aes(x=threshold, y=coef) +
  geom_point(size=1) +
  geom_errorbar(aes(ymin=cil, ymax=cir), size=0.5, alpha=0.4) +
  scale_x_continuous(breaks=seq(20,80,5)) +
  geom_hline(aes(yintercept=0),
             linetype='dashed', colour='#666666', size=0.3) +
  ggtitle("Plurality minority") +
  theme(plot.title = element_text(size=14, lineheight=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size =14), axis.title.y = element_text(size =14),
        axis.text.x = element_text(size=10), axis.text.y = element_text(size = 10)) +
  xlab('Ethnic minority population threshold') +
  ylab('RD effect on turnout rate')
ggsave('./output/figureB6b.pdf', width=6, height=4.85)



#### Compute Figure D.1: Effects on vote share of strongest opponent's
# define majority-white/plurality-minority data
turnout_majority <- turnout[ethnicity_minority<20]
turnout_minority <- turnout[ethnicity_minority>=20]

# compute RD estimate among majority-white constituencies and store optimal bandwidth size
main <- rdrobust(turnout_majority[, share_non_coethnic_opposition_t1],
                 turnout_majority[, minority_victory_margin],
                 kernel = "triangular", p = 1, bwselect = "mserd")

bws <- main$bws[1]
# compute and save plot within optimal bandwidth size
mainp <- rdplot(turnout_majority[(abs(minority_victory_margin) <= bws),
                                  share_non_coethnic_opposition_t1],
                turnout_majority[(abs(minority_victory_margin) <= bws),
                                  minority_victory_margin],
                p = 1,
                y.lim = c(0,70),
                x.label = "ethnic minority victory margin, t",
                y.label= "strongest opponent party's vote share, t+1", title = " ",
                col.lines = "#FF3300", col.dots = '#666666')

mainp$rdplot +
  ggtitle('Majority white') +
  theme(plot.title = element_text(size=18, lineheight=3),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size =14),
        axis.title.y = element_text(size =14),
        axis.text.y = element_text(size = 12))
ggsave(file = './output/figureD1a.pdf', width=6, height=4.85)

# compute RD estimate among plurality-minority constituencies and store optimal bandwidth size
main <- rdrobust(turnout_minority[, share_non_coethnic_opposition_t1],
                 turnout_minority[, minority_victory_margin],
                 kernel = "triangular", p = 1, bwselect = "mserd")

bws <- main$bws[1]
# compute and save plot within optimal bandwidth size
mainp <- rdplot(turnout_minority[(abs(minority_victory_margin) <= bws),
                                  share_non_coethnic_opposition_t1],
                turnout_minority[(abs(minority_victory_margin) <= bws),
                                  minority_victory_margin],
                p = 1,
                y.lim = c(0,70),
                x.label = "ethnic minority victory margin, t",
                y.label= "strongest opponent party's vote share, t+1", title = " ",
                col.lines = "#FF3300", col.dots = '#666666')

mainp$rdplot +
  ggtitle('Plurality minority') +
  theme(plot.title = element_text(size=18, lineheight=3),
        axis.text.x = element_text(size = 12),
        axis.title.x = element_text(size =14),
        axis.title.y = element_text(size =14),
        axis.text.y = element_text(size = 12))
ggsave(file = './output/figureD1b.pdf', width=6, height=4.85)

############# END ################
